522 || // SPDX-License-Identifier: GPL-2.0 /* * Provides code common for host and device side USB. * * If either host side (ie. CONFIG_USB=y) or device side USB stack * (ie. CONFIG_USB_GADGET=y) is compiled in the kernel, this module is * compiled-in as well. Otherwise, if either of the two stacks is * compiled as module, this file is compiled as module as well. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/usb/ch9.h> #include <linux/usb/of.h> #include <linux/usb/otg.h> #include <linux/of_platform.h> #include <linux/debugfs.h> #include "common.h" static const char *const ep_type_names[] = { [USB_ENDPOINT_XFER_CONTROL] = "ctrl", [USB_ENDPOINT_XFER_ISOC] = "isoc", [USB_ENDPOINT_XFER_BULK] = "bulk", [USB_ENDPOINT_XFER_INT] = "intr", }; /** * usb_ep_type_string() - Returns human readable-name of the endpoint type. * @ep_type: The endpoint type to return human-readable name for. If it's not * any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT}, * usually got by usb_endpoint_type(), the string 'unknown' will be returned. */ const char *usb_ep_type_string(int ep_type) { if (ep_type < 0 || ep_type >= ARRAY_SIZE(ep_type_names)) return "unknown"; return ep_type_names[ep_type]; } EXPORT_SYMBOL_GPL(usb_ep_type_string); const char *usb_otg_state_string(enum usb_otg_state state) { static const char *const names[] = { [OTG_STATE_A_IDLE] = "a_idle", [OTG_STATE_A_WAIT_VRISE] = "a_wait_vrise", [OTG_STATE_A_WAIT_BCON] = "a_wait_bcon", [OTG_STATE_A_HOST] = "a_host", [OTG_STATE_A_SUSPEND] = "a_suspend", [OTG_STATE_A_PERIPHERAL] = "a_peripheral", [OTG_STATE_A_WAIT_VFALL] = "a_wait_vfall", [OTG_STATE_A_VBUS_ERR] = "a_vbus_err", [OTG_STATE_B_IDLE] = "b_idle", [OTG_STATE_B_SRP_INIT] = "b_srp_init", [OTG_STATE_B_PERIPHERAL] = "b_peripheral", [OTG_STATE_B_WAIT_ACON] = "b_wait_acon", [OTG_STATE_B_HOST] = "b_host", }; if (state < 0 || state >= ARRAY_SIZE(names)) return "UNDEFINED"; return names[state]; } EXPORT_SYMBOL_GPL(usb_otg_state_string); static const char *const speed_names[] = { [USB_SPEED_UNKNOWN] = "UNKNOWN", [USB_SPEED_LOW] = "low-speed", [USB_SPEED_FULL] = "full-speed", [USB_SPEED_HIGH] = "high-speed", [USB_SPEED_WIRELESS] = "wireless", [USB_SPEED_SUPER] = "super-speed", [USB_SPEED_SUPER_PLUS] = "super-speed-plus", }; static const char *const ssp_rate[] = { [USB_SSP_GEN_UNKNOWN] = "UNKNOWN", [USB_SSP_GEN_2x1] = "super-speed-plus-gen2x1", [USB_SSP_GEN_1x2] = "super-speed-plus-gen1x2", [USB_SSP_GEN_2x2] = "super-speed-plus-gen2x2", }; /** * usb_speed_string() - Returns human readable-name of the speed. * @speed: The speed to return human-readable name for. If it's not * any of the speeds defined in usb_device_speed enum, string for * USB_SPEED_UNKNOWN will be returned. */ const char *usb_speed_string(enum usb_device_speed speed) { if (speed < 0 || speed >= ARRAY_SIZE(speed_names)) speed = USB_SPEED_UNKNOWN; return speed_names[speed]; } EXPORT_SYMBOL_GPL(usb_speed_string); /** * usb_get_maximum_speed - Get maximum requested speed for a given USB * controller. * @dev: Pointer to the given USB controller device * * The function gets the maximum speed string from property "maximum-speed", * and returns the corresponding enum usb_device_speed. */ enum usb_device_speed usb_get_maximum_speed(struct device *dev) { const char *maximum_speed; int ret; ret = device_property_read_string(dev, "maximum-speed", &maximum_speed); if (ret < 0) return USB_SPEED_UNKNOWN; ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed); if (ret > 0) return USB_SPEED_SUPER_PLUS; ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed); return (ret < 0) ? USB_SPEED_UNKNOWN : ret; } EXPORT_SYMBOL_GPL(usb_get_maximum_speed); /** * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count * of a SuperSpeed Plus capable device. * @dev: Pointer to the given USB controller device * * If the string from "maximum-speed" property is super-speed-plus-genXxY where * 'X' is the generation number and 'Y' is the number of lanes, then this * function returns the corresponding enum usb_ssp_rate. */ enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev) { const char *maximum_speed; int ret; ret = device_property_read_string(dev, "maximum-speed", &maximum_speed); if (ret < 0) return USB_SSP_GEN_UNKNOWN; ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed); return (ret < 0) ? USB_SSP_GEN_UNKNOWN : ret; } EXPORT_SYMBOL_GPL(usb_get_maximum_ssp_rate); /** * usb_state_string - Returns human readable name for the state. * @state: The state to return a human-readable name for. If it's not * any of the states devices in usb_device_state_string enum, * the string UNKNOWN will be returned. */ const char *usb_state_string(enum usb_device_state state) { static const char *const names[] = { [USB_STATE_NOTATTACHED] = "not attached", [USB_STATE_ATTACHED] = "attached", [USB_STATE_POWERED] = "powered", [USB_STATE_RECONNECTING] = "reconnecting", [USB_STATE_UNAUTHENTICATED] = "unauthenticated", [USB_STATE_DEFAULT] = "default", [USB_STATE_ADDRESS] = "addressed", [USB_STATE_CONFIGURED] = "configured", [USB_STATE_SUSPENDED] = "suspended", }; if (state < 0 || state >= ARRAY_SIZE(names)) return "UNKNOWN"; return names[state]; } EXPORT_SYMBOL_GPL(usb_state_string); static const char *const usb_dr_modes[] = { [USB_DR_MODE_UNKNOWN] = "", [USB_DR_MODE_HOST] = "host", [USB_DR_MODE_PERIPHERAL] = "peripheral", [USB_DR_MODE_OTG] = "otg", }; static enum usb_dr_mode usb_get_dr_mode_from_string(const char *str) { int ret; ret = match_string(usb_dr_modes, ARRAY_SIZE(usb_dr_modes), str); return (ret < 0) ? USB_DR_MODE_UNKNOWN : ret; } enum usb_dr_mode usb_get_dr_mode(struct device *dev) { const char *dr_mode; int err; err = device_property_read_string(dev, "dr_mode", &dr_mode); if (err < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(dr_mode); } EXPORT_SYMBOL_GPL(usb_get_dr_mode); /** * usb_get_role_switch_default_mode - Get default mode for given device * @dev: Pointer to the given device * * The function gets string from property 'role-switch-default-mode', * and returns the corresponding enum usb_dr_mode. */ enum usb_dr_mode usb_get_role_switch_default_mode(struct device *dev) { const char *str; int ret; ret = device_property_read_string(dev, "role-switch-default-mode", &str); if (ret < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(str); } EXPORT_SYMBOL_GPL(usb_get_role_switch_default_mode); /** * usb_decode_interval - Decode bInterval into the time expressed in 1us unit * @epd: The descriptor of the endpoint * @speed: The speed that the endpoint works as * * Function returns the interval expressed in 1us unit for servicing * endpoint for data transfers. */ unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, enum usb_device_speed speed) { unsigned int interval = 0; switch (usb_endpoint_type(epd)) { case USB_ENDPOINT_XFER_CONTROL: /* uframes per NAK */ if (speed == USB_SPEED_HIGH) interval = epd->bInterval; break; case USB_ENDPOINT_XFER_ISOC: interval = 1 << (epd->bInterval - 1); break; case USB_ENDPOINT_XFER_BULK: /* uframes per NAK */ if (speed == USB_SPEED_HIGH && usb_endpoint_dir_out(epd)) interval = epd->bInterval; break; case USB_ENDPOINT_XFER_INT: if (speed >= USB_SPEED_HIGH) interval = 1 << (epd->bInterval - 1); else interval = epd->bInterval; break; } interval *= (speed >= USB_SPEED_HIGH) ? 125 : 1000; return interval; } EXPORT_SYMBOL_GPL(usb_decode_interval); #ifdef CONFIG_OF /** * of_usb_get_dr_mode_by_phy - Get dual role mode for the controller device * which is associated with the given phy device_node * @np: Pointer to the given phy device_node * @arg0: phandle args[0] for phy's with #phy-cells >= 1, or -1 for * phys which do not have phy-cells * * In dts a usb controller associates with phy devices. The function gets * the string from property 'dr_mode' of the controller associated with the * given phy device node, and returns the correspondig enum usb_dr_mode. */ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) { struct device_node *controller = NULL; struct of_phandle_args args; const char *dr_mode; int index; int err; do { controller = of_find_node_with_property(controller, "phys"); if (!of_device_is_available(controller)) continue; index = 0; do { if (arg0 == -1) { args.np = of_parse_phandle(controller, "phys", index); args.args_count = 0; } else { err = of_parse_phandle_with_args(controller, "phys", "#phy-cells", index, &args); if (err) break; } of_node_put(args.np); if (args.np == np && (args.args_count == 0 || args.args[0] == arg0)) goto finish; index++; } while (args.np); } while (controller); finish: err = of_property_read_string(controller, "dr_mode", &dr_mode); of_node_put(controller); if (err < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(dr_mode); } EXPORT_SYMBOL_GPL(of_usb_get_dr_mode_by_phy); /** * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported * for given targeted hosts (non-PC hosts) * @np: Pointer to the given device_node * * The function gets if the targeted hosts support TPL or not */ bool of_usb_host_tpl_support(struct device_node *np) { return of_property_read_bool(np, "tpl-support"); } EXPORT_SYMBOL_GPL(of_usb_host_tpl_support); /** * of_usb_update_otg_caps - to update usb otg capabilities according to * the passed properties in DT. * @np: Pointer to the given device_node * @otg_caps: Pointer to the target usb_otg_caps to be set * * The function updates the otg capabilities */ int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps) { u32 otg_rev; if (!otg_caps) return -EINVAL; if (!of_property_read_u32(np, "otg-rev", &otg_rev)) { switch (otg_rev) { case 0x0100: case 0x0120: case 0x0130: case 0x0200: /* Choose the lesser one if it's already been set */ if (otg_caps->otg_rev) otg_caps->otg_rev = min_t(u16, otg_rev, otg_caps->otg_rev); else otg_caps->otg_rev = otg_rev; break; default: pr_err("%pOF: unsupported otg-rev: 0x%x\n", np, otg_rev); return -EINVAL; } } else { /* * otg-rev is mandatory for otg properties, if not passed * we set it to be 0 and assume it's a legacy otg device. * Non-dt platform can set it afterwards. */ otg_caps->otg_rev = 0; } if (of_property_read_bool(np, "hnp-disable")) otg_caps->hnp_support = false; if (of_property_read_bool(np, "srp-disable")) otg_caps->srp_support = false; if (of_property_read_bool(np, "adp-disable") || (otg_caps->otg_rev < 0x0200)) otg_caps->adp_support = false; return 0; } EXPORT_SYMBOL_GPL(of_usb_update_otg_caps); /** * usb_of_get_companion_dev - Find the companion device * @dev: the device pointer to find a companion * * Find the companion device from platform bus. * * Takes a reference to the returned struct device which needs to be dropped * after use. * * Return: On success, a pointer to the companion device, %NULL on failure. */ struct device *usb_of_get_companion_dev(struct device *dev) { struct device_node *node; struct platform_device *pdev = NULL; node = of_parse_phandle(dev->of_node, "companion", 0); if (node) pdev = of_find_device_by_node(node); of_node_put(node); return pdev ? &pdev->dev : NULL; } EXPORT_SYMBOL_GPL(usb_of_get_companion_dev); #endif struct dentry *usb_debug_root; EXPORT_SYMBOL_GPL(usb_debug_root); static int __init usb_common_init(void) { usb_debug_root = debugfs_create_dir("usb", NULL); ledtrig_usb_init(); return 0; } static void __exit usb_common_exit(void) { ledtrig_usb_exit(); debugfs_remove_recursive(usb_debug_root); } subsys_initcall(usb_common_init); module_exit(usb_common_exit); MODULE_LICENSE("GPL"); |
4 138 128 291 272 41 21 20 158 135 25 173 6 50 17 149 14 1 8 5 2 4 2 3 163 8 5 3 26 17 1 18 10 2 1 3 8 27 20 2 10 17 1 9 1 6 3 3 6 3 6 3 3 2 1 156 207 1 1 6 66 101 102 20 16 13 13 15 7 12 1 38 33 5 14 14 96 16 16 96 110 97 13 101 9 110 111 110 97 19 3 5 109 105 106 55 55 105 106 83 96 96 46 56 89 76 76 13 7 76 7 46 56 3 3 157 3 158 1 1 1 164 2 5 7 16 139 65 88 160 145 5 1 151 96 1 5 1 57 6 1 13 11 1 12 9 1 1 1 1 2 3 1 1 2 1 1 1 190 1 13 1 18 163 161 2 160 1 1 10 27 1 2 1 1 9 20 3 162 3 17 4 97 70 96 3 5 5 5 5 6 6 5 3 5 5 8 8 7 7 6 6 6 1 1 1 6 6 7 22 1 3 1 1 1 14 1 5 7 9 8 15 1 1 7 1 1 3 3 3 2 2 2 1 1 7 3 2 10 1 2 1 3 1 402 || // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_api.c Packet scheduler API. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Fixes: * * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/hrtimer.h> #include <linux/slab.h> #include <linux/hashtable.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <trace/events/qdisc.h> /* Short review. ------------- This file consists of two interrelated parts: 1. queueing disciplines manager frontend. 2. traffic classes manager frontend. Generally, queueing discipline ("qdisc") is a black box, which is able to enqueue packets and to dequeue them (when device is ready to send something) in order and at times determined by algorithm hidden in it. qdisc's are divided to two categories: - "queues", which have no internal structure visible from outside. - "schedulers", which split all the packets to "traffic classes", using "packet classifiers" (look at cls_api.c) In turn, classes may have child qdiscs (as rule, queues) attached to them etc. etc. etc. The goal of the routines in this file is to translate information supplied by user in the form of handles to more intelligible for kernel form, to make some sanity checks and part of work, which is common to all qdiscs and to provide rtnetlink notifications. All real intelligent work is done inside qdisc modules. Every discipline has two major routines: enqueue and dequeue. ---dequeue dequeue usually returns a skb to send. It is allowed to return NULL, but it does not mean that queue is empty, it just means that discipline does not want to send anything this time. Queue is really empty if q->q.qlen == 0. For complicated disciplines with multiple queues q->q is not real packet queue, but however q->q.qlen must be valid. ---enqueue enqueue returns 0, if packet was enqueued successfully. If packet (this one or another one) was dropped, it returns not zero error code. NET_XMIT_DROP - this packet dropped Expected action: do not backoff, but wait until queue will clear. NET_XMIT_CN - probably this packet enqueued, but another one dropped. Expected action: backoff or ignore Auxiliary routines: ---peek like dequeue but without removing a packet from the queue ---reset returns qdisc to initial state: purge all buffers, clear all timers, counters (except for statistics) etc. ---init initializes newly created qdisc. ---destroy destroys resources allocated by init and during lifetime of qdisc. ---change changes qdisc parameters. */ /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(qdisc_mod_lock); /************************************************ * Queueing disciplines manipulation. * ************************************************/ /* The list of all installed queueing disciplines. */ static struct Qdisc_ops *qdisc_base; /* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int rc = -EEXIST; write_lock(&qdisc_mod_lock); for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) if (!strcmp(qops->id, q->id)) goto out; if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; if (qops->peek == NULL) { if (qops->dequeue == NULL) qops->peek = noop_qdisc_ops.peek; else goto out_einval; } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; if (qops->cl_ops) { const struct Qdisc_class_ops *cops = qops->cl_ops; if (!(cops->find && cops->walk && cops->leaf)) goto out_einval; if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) goto out_einval; } qops->next = NULL; *qp = qops; rc = 0; out: write_unlock(&qdisc_mod_lock); return rc; out_einval: rc = -EINVAL; goto out; } EXPORT_SYMBOL(register_qdisc); void unregister_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int err = -ENOENT; write_lock(&qdisc_mod_lock); for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) if (q == qops) break; if (q) { *qp = q->next; q->next = NULL; err = 0; } write_unlock(&qdisc_mod_lock); WARN(err, "unregister qdisc(%s) failed\n", qops->id); } EXPORT_SYMBOL(unregister_qdisc); /* Get default qdisc if not otherwise specified */ void qdisc_get_default(char *name, size_t len) { read_lock(&qdisc_mod_lock); strscpy(name, default_qdisc_ops->id, len); read_unlock(&qdisc_mod_lock); } static struct Qdisc_ops *qdisc_lookup_default(const char *name) { struct Qdisc_ops *q = NULL; for (q = qdisc_base; q; q = q->next) { if (!strcmp(name, q->id)) { if (!try_module_get(q->owner)) q = NULL; break; } } return q; } /* Set new default qdisc to use */ int qdisc_set_default(const char *name) { const struct Qdisc_ops *ops; if (!capable(CAP_NET_ADMIN)) return -EPERM; write_lock(&qdisc_mod_lock); ops = qdisc_lookup_default(name); if (!ops) { /* Not found, drop lock and try to load module */ write_unlock(&qdisc_mod_lock); request_module("sch_%s", name); write_lock(&qdisc_mod_lock); ops = qdisc_lookup_default(name); } if (ops) { /* Set new default */ module_put(default_qdisc_ops->owner); default_qdisc_ops = ops; } write_unlock(&qdisc_mod_lock); return ops ? 0 : -ENOENT; } #ifdef CONFIG_NET_SCH_DEFAULT /* Set default value from kernel config */ static int __init sch_default_qdisc(void) { return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); } late_initcall(sch_default_qdisc); #endif /* We know handle. Find qdisc among all qdisc's attached to device * (root qdisc, all its children, children of children etc.) * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; if (!qdisc_dev(root)) return (root->handle == handle ? root : NULL); if (!(root->flags & TCQ_F_BUILTIN) && root->handle == handle) return root; hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle, lockdep_rtnl_is_held()) { if (q->handle == handle) return q; } return NULL; } void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) q->flags |= TCQ_F_INVISIBLE; } } EXPORT_SYMBOL(qdisc_hash_add); void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); hash_del_rcu(&q->hash); } } EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; if (!handle) return NULL; q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle); if (q) goto out; if (dev_ingress_queue(dev)) q = qdisc_match_from_root( rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping), handle); out: return q; } struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) { struct netdev_queue *nq; struct Qdisc *q; if (!handle) return NULL; q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle); if (q) goto out; nq = dev_ingress_queue_rcu(dev); if (nq) q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping), handle); out: return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; if (cops == NULL) return NULL; cl = cops->find(p, classid); if (cl == 0) return NULL; return cops->leaf(p, cl); } /* Find queueing discipline by name */ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) { struct Qdisc_ops *q = NULL; if (kind) { read_lock(&qdisc_mod_lock); for (q = qdisc_base; q; q = q->next) { if (nla_strcmp(kind, q->id) == 0) { if (!try_module_get(q->owner)) q = NULL; break; } } read_unlock(&qdisc_mod_lock); } return q; } /* The linklayer setting were not transferred from iproute2, in older * versions, and the rate tables lookup systems have been dropped in * the kernel. To keep backward compatible with older iproute2 tc * utils, we detect the linklayer setting by detecting if the rate * table were modified. * * For linklayer ATM table entries, the rate table will be aligned to * 48 bytes, thus some table entries will contain the same value. The * mpu (min packet unit) is also encoded into the old rate table, thus * starting from the mpu, we find low and high table entries for * mapping this cell. If these entries contain the same value, when * the rate tables have been modified for linklayer ATM. * * This is done by rounding mpu to the nearest 48 bytes cell/entry, * and then roundup to the next cell, calc the table entry one below, * and compare. */ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) { int low = roundup(r->mpu, 48); int high = roundup(low+1, 48); int cell_low = low >> r->cell_log; int cell_high = (high >> r->cell_log) - 1; /* rtab is too inaccurate at rates > 100Mbit/s */ if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { pr_debug("TC linklayer: Giving up ATM detection\n"); return TC_LINKLAYER_ETHERNET; } if ((cell_high > cell_low) && (cell_high < 256) && (rtab[cell_low] == rtab[cell_high])) { pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", cell_low, cell_high, rtab[cell_high]); return TC_LINKLAYER_ATM; } return TC_LINKLAYER_ETHERNET; } static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack) { struct qdisc_rate_table *rtab; if (tab == NULL || r->rate == 0 || r->cell_log == 0 || r->cell_log >= 32 || nla_len(tab) != TC_RTAB_SIZE) { NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; } for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && !memcmp(&rtab->data, nla_data(tab), 1024)) { rtab->refcnt++; return rtab; } } rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); if (rtab) { rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); if (r->linklayer == TC_LINKLAYER_UNAWARE) r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } else { NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); } return rtab; } EXPORT_SYMBOL(qdisc_get_rtab); void qdisc_put_rtab(struct qdisc_rate_table *tab) { struct qdisc_rate_table *rtab, **rtabp; if (!tab || --tab->refcnt) return; for (rtabp = &qdisc_rtab_list; (rtab = *rtabp) != NULL; rtabp = &rtab->next) { if (rtab == tab) { *rtabp = rtab->next; kfree(rtab); return; } } } EXPORT_SYMBOL(qdisc_put_rtab); static LIST_HEAD(qdisc_stab_list); static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, [TCA_STAB_DATA] = { .type = NLA_BINARY }, }; static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_STAB_MAX + 1]; struct qdisc_size_table *stab; struct tc_sizespec *s; unsigned int tsize = 0; u16 *tab = NULL; int err; err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, extack); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) { NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); return ERR_PTR(-EINVAL); } s = nla_data(tb[TCA_STAB_BASE]); if (s->tsize > 0) { if (!tb[TCA_STAB_DATA]) { NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); return ERR_PTR(-EINVAL); } tab = nla_data(tb[TCA_STAB_DATA]); tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); } if (tsize != s->tsize || (!tab && tsize > 0)) { NL_SET_ERR_MSG(extack, "Invalid size of size table"); return ERR_PTR(-EINVAL); } list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; if (tsize > 0 && memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) continue; stab->refcnt++; return stab; } if (s->size_log > STAB_SIZE_LOG_MAX || s->cell_log > STAB_SIZE_LOG_MAX) { NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); return ERR_PTR(-EINVAL); } stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); stab->refcnt = 1; stab->szopts = *s; if (tsize > 0) memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); list_add_tail(&stab->list, &qdisc_stab_list); return stab; } void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) return; if (--tab->refcnt == 0) { list_del(&tab->list); kfree_rcu(tab, rcu); } } EXPORT_SYMBOL(qdisc_put_stab); static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_STAB); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) goto nla_put_failure; nla_nest_end(skb, nest); return skb->len; nla_put_failure: return -1; } void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; pkt_len = skb->len + stab->szopts.overhead; if (unlikely(!stab->szopts.tsize)) goto out; slot = pkt_len + stab->szopts.cell_align; if (unlikely(slot < 0)) slot = 0; slot >>= stab->szopts.cell_log; if (likely(slot < stab->szopts.tsize)) pkt_len = stab->data[slot]; else pkt_len = stab->data[stab->szopts.tsize - 1] * (slot / stab->szopts.tsize) + stab->data[slot % stab->szopts.tsize]; pkt_len <<= stab->szopts.size_log; out: if (unlikely(pkt_len < 1)) pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", txt, qdisc->ops->id, qdisc->handle >> 16); qdisc->flags |= TCQ_F_WARN_NONWC; } } EXPORT_SYMBOL(qdisc_warn_nonwc); static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); rcu_read_lock(); __netif_schedule(qdisc_root(wd->qdisc)); rcu_read_unlock(); return HRTIMER_NORESTART; } void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid) { hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); wd->timer.function = qdisc_watchdog; wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init_clockid); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); } EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns) { bool deactivated; rcu_read_lock(); deactivated = test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state); rcu_read_unlock(); if (deactivated) return; if (hrtimer_is_queued(&wd->timer)) { u64 softexpires; softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer)); /* If timer is already set in [expires, expires + delta_ns], * do not reprogram it. */ if (softexpires - expires <= delta_ns) return; } hrtimer_start_range_ns(&wd->timer, ns_to_ktime(expires), delta_ns, HRTIMER_MODE_ABS_PINNED); } EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); } EXPORT_SYMBOL(qdisc_watchdog_cancel); static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { struct hlist_head *h; unsigned int i; h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); if (h != NULL) { for (i = 0; i < n; i++) INIT_HLIST_HEAD(&h[i]); } return h; } void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; struct hlist_node *next; struct hlist_head *nhash, *ohash; unsigned int nsize, nmask, osize; unsigned int i, h; /* Rehash when load factor exceeds 0.75 */ if (clhash->hashelems * 4 <= clhash->hashsize * 3) return; nsize = clhash->hashsize * 2; nmask = nsize - 1; nhash = qdisc_class_hash_alloc(nsize); if (nhash == NULL) return; ohash = clhash->hash; osize = clhash->hashsize; sch_tree_lock(sch); for (i = 0; i < osize; i++) { hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { h = qdisc_class_hash(cl->classid, nmask); hlist_add_head(&cl->hnode, &nhash[h]); } } clhash->hash = nhash; clhash->hashsize = nsize; clhash->hashmask = nmask; sch_tree_unlock(sch); kvfree(ohash); } EXPORT_SYMBOL(qdisc_class_hash_grow); int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) { unsigned int size = 4; clhash->hash = qdisc_class_hash_alloc(size); if (!clhash->hash) return -ENOMEM; clhash->hashsize = size; clhash->hashmask = size - 1; clhash->hashelems = 0; return 0; } EXPORT_SYMBOL(qdisc_class_hash_init); void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) { kvfree(clhash->hash); } EXPORT_SYMBOL(qdisc_class_hash_destroy); void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, struct Qdisc_class_common *cl) { unsigned int h; INIT_HLIST_NODE(&cl->hnode); h = qdisc_class_hash(cl->classid, clhash->hashmask); hlist_add_head(&cl->hnode, &clhash->hash[h]); clhash->hashelems++; } EXPORT_SYMBOL(qdisc_class_hash_insert); void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, struct Qdisc_class_common *cl) { hlist_del(&cl->hnode); clhash->hashelems--; } EXPORT_SYMBOL(qdisc_class_hash_remove); /* Allocate an unique handle from space managed by kernel * Possible range is [8000-FFFF]:0000 (0x8000 values) */ static u32 qdisc_alloc_handle(struct net_device *dev) { int i = 0x8000; static u32 autohandle = TC_H_MAKE(0x80000000U, 0); do { autohandle += TC_H_MAKE(0x10000U, 0); if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) autohandle = TC_H_MAKE(0x80000000U, 0); if (!qdisc_lookup(dev, autohandle)) return autohandle; cond_resched(); } while (--i > 0); return 0; } void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; if (n == 0 && len == 0) return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) break; if (sch->flags & TCQ_F_NOPARENT) break; /* Notify parent qdisc only if child qdisc becomes empty. * * If child was empty even before update then backlog * counter is screwed and we skip notification because * parent class is already passive. * * If the original child was offloaded then it is allowed * to be seem as empty, so the parent is notified anyway. */ notify = !sch->q.qlen && !WARN_ON_ONCE(!n && !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } sch->q.qlen -= n; sch->qstats.backlog -= len; __qdisc_qstats_drop(sch, drops); } rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_reduce_backlog); int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, void *type_data) { struct net_device *dev = qdisc_dev(sch); int err; sch->flags &= ~TCQ_F_OFFLOADED; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return 0; err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); if (err == -EOPNOTSUPP) return 0; if (!err) sch->flags |= TCQ_F_OFFLOADED; return err; } EXPORT_SYMBOL(qdisc_offload_dump_helper); void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, enum tc_setup_type type, void *type_data, struct netlink_ext_ack *extack) { bool any_qdisc_is_offloaded; int err; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); /* Don't report error if the graft is part of destroy operation. */ if (!err || !new || new == &noop_qdisc) return; /* Don't report error if the parent, the old child and the new * one are not offloaded. */ any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; if (any_qdisc_is_offloaded) NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); } EXPORT_SYMBOL(qdisc_offload_graft_helper); void qdisc_offload_query_caps(struct net_device *dev, enum tc_setup_type type, void *caps, size_t caps_len) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_query_caps_base base = { .type = type, .caps = caps, }; memset(caps, 0, caps_len); if (ops->ndo_setup_tc) ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); } EXPORT_SYMBOL(qdisc_offload_query_caps); static void qdisc_offload_graft_root(struct net_device *dev, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct tc_root_qopt_offload graft_offload = { .command = TC_ROOT_GRAFT, .handle = new ? new->handle : 0, .ingress = (new && new->flags & TCQ_F_INGRESS) || (old && old->flags & TCQ_F_INGRESS), }; qdisc_offload_graft_helper(dev, NULL, new, old, TC_SETUP_ROOT_QDISC, &graft_offload, extack); } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct qdisc_size_table *stab; u32 block_index; __u32 qlen; cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = refcount_read(&q->refcnt); if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; if (q->ops->ingress_block_get) { block_index = q->ops->ingress_block_get(q); if (block_index && nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) goto nla_put_failure; } if (q->ops->egress_block_get) { block_index = q->ops->egress_block_get(q); if (block_index && nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) goto nla_put_failure; } if (q->ops->dump && q->ops->dump(q, skb) < 0) goto nla_put_failure; if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) goto nla_put_failure; qlen = qdisc_qlen_sum(q); stab = rtnl_dereference(q->stab); if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) goto nla_put_failure; if (qdisc_is_percpu_stats(q)) { cpu_bstats = q->cpu_bstats; cpu_qstats = q->cpu_qstats; } if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) goto nla_put_failure; if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; } static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) { if (q->flags & TCQ_F_BUILTIN) return true; if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) return true; return false; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old, false)) { if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC, extack) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new, false)) { if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0) goto err_out; } if (skb->len) return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: kfree_skb(skb); return -EINVAL; } static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, struct netlink_ext_ack *extack) { if (new || old) qdisc_notify(net, skb, n, clid, old, new, extack); if (old) qdisc_put(old); } static void qdisc_clear_nolock(struct Qdisc *sch) { sch->flags &= ~TCQ_F_NOLOCK; if (!(sch->flags & TCQ_F_CPUSTATS)) return; free_percpu(sch->cpu_bstats); free_percpu(sch->cpu_qstats); sch->cpu_bstats = NULL; sch->cpu_qstats = NULL; sch->flags &= ~TCQ_F_CPUSTATS; } /* Graft qdisc "new" to class "classid" of qdisc "parent" or * to device "dev". * * When appropriate send a netlink notification using 'skb' * and "n". * * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, struct sk_buff *skb, struct nlmsghdr *n, u32 classid, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct Qdisc *q = old; struct net *net = dev_net(dev); if (parent == NULL) { unsigned int i, num_q, ingress; struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { ingress = 1; dev_queue = dev_ingress_queue(dev); if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } q = rtnl_dereference(dev_queue->qdisc_sleeping); /* This is the counterpart of that qdisc_refcount_inc_nz() call in * __tcf_qdisc_find() for filter requests. */ if (!qdisc_refcount_dec_if_one(q)) { NL_SET_ERR_MSG(extack, "Current ingress or clsact Qdisc has ongoing filter requests"); return -EBUSY; } } if (dev->flags & IFF_UP) dev_deactivate(dev); qdisc_offload_graft_root(dev, new, old, extack); if (new && new->ops->attach && !ingress) goto skip; if (!ingress) { for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) qdisc_refcount_inc(new); qdisc_put(old); } } else { old = dev_graft_qdisc(dev_queue, NULL); /* {ingress,clsact}_destroy() @old before grafting @new to avoid * unprotected concurrent accesses to net_device::miniq_{in,e}gress * pointer(s) in mini_qdisc_pair_swap(). */ qdisc_notify(net, skb, n, classid, old, new, extack); qdisc_destroy(old); dev_graft_qdisc(dev_queue, new); } skip: if (!ingress) { old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); notify_and_destroy(net, skb, n, classid, old, new, extack); if (new && new->ops->attach) new->ops->attach(new); } if (dev->flags & IFF_UP) dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; unsigned long cl; int err; /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK)) qdisc_clear_nolock(new); if (!cops || !cops->graft) return -EOPNOTSUPP; cl = cops->find(parent, classid); if (!cl) { NL_SET_ERR_MSG(extack, "Specified class not found"); return -ENOENT; } if (new && new->ops == &noqueue_qdisc_ops) { NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); return -EINVAL; } err = cops->graft(parent, cl, new, &old, extack); if (err) return err; notify_and_destroy(net, skb, n, classid, old, new, extack); } return 0; } static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { u32 block_index; if (tca[TCA_INGRESS_BLOCK]) { block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); if (!block_index) { NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); return -EINVAL; } if (!sch->ops->ingress_block_set) { NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); return -EOPNOTSUPP; } sch->ops->ingress_block_set(sch, block_index); } if (tca[TCA_EGRESS_BLOCK]) { block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); if (!block_index) { NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); return -EINVAL; } if (!sch->ops->egress_block_set) { NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); return -EOPNOTSUPP; } sch->ops->egress_block_set(sch, block_index); } return 0; } /* Allocate and initialize new qdisc. Parameters are passed via opt. */ static struct Qdisc *qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, u32 parent, u32 handle, struct nlattr **tca, int *errp, struct netlink_ext_ack *extack) { int err; struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * tell the caller to replay the request. We * indicate this using -EAGAIN. * We replay the request because the device may * go away in the mean time. */ rtnl_unlock(); request_module("sch_%s", name); rtnl_lock(); ops = qdisc_lookup_ops(kind); if (ops != NULL) { /* We will try again qdisc_lookup_ops, * so don't keep a reference. */ module_put(ops->owner); err = -EAGAIN; goto err_out; } } } #endif err = -ENOENT; if (!ops) { NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } sch = qdisc_alloc(dev_queue, ops, extack); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; } sch->parent = parent; if (handle == TC_H_INGRESS) { if (!(sch->flags & TCQ_F_INGRESS)) { NL_SET_ERR_MSG(extack, "Specified parent ID is reserved for ingress and clsact Qdiscs"); err = -EINVAL; goto err_out3; } handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); if (handle == 0) { NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); err = -ENOSPC; goto err_out3; } } if (!netif_is_multiqueue(dev)) sch->flags |= TCQ_F_ONETXQUEUE; } sch->handle = handle; /* This exist to keep backward compatible with a userspace * loophole, what allowed userspace to get IFF_NO_QUEUE * facility on older kernels by setting tx_queue_len=0 (prior * to qdisc init), and then forgot to reinit tx_queue_len * before again attaching a qdisc. */ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } err = qdisc_block_indexes_set(sch, tca, extack); if (err) goto err_out3; if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) { err = PTR_ERR(stab); goto err_out3; } rcu_assign_pointer(sch->stab, stab); } if (ops->init) { err = ops->init(sch, tca[TCA_OPTIONS], extack); if (err != 0) goto err_out4; } if (tca[TCA_RATE]) { err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); goto err_out4; } err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); goto err_out4; } } qdisc_hash_add(sch, false); trace_qdisc_create(ops, dev, parent); return sch; err_out4: /* Even if ops->init() failed, we call ops->destroy() * like qdisc_create_dflt(). */ if (ops->destroy) ops->destroy(sch); qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: module_put(ops->owner); err_out: *errp = err; return NULL; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { if (!sch->ops->change) { NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); return -EINVAL; } if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); return -EOPNOTSUPP; } err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); if (err) return err; } if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) return PTR_ERR(stab); } ostab = rtnl_dereference(sch->stab); rcu_assign_pointer(sch->stab, stab); qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ if (sch->flags & TCQ_F_MQROOT) goto out; gen_replace_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, true, tca[TCA_RATE]); } out: return 0; } struct check_loop_arg { struct qdisc_walker w; struct Qdisc *p; int depth; }; static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { struct check_loop_arg arg; if (q->ops->cl_ops == NULL) return 0; arg.w.stop = arg.w.skip = arg.w.count = 0; arg.w.fn = check_loop_fn; arg.depth = depth; arg.p = p; q->ops->cl_ops->walk(q, &arg.w); return arg.w.stop ? -ELOOP : 0; } static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { struct Qdisc *leaf; const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct check_loop_arg *arg = (struct check_loop_arg *)w; leaf = cops->leaf(q, cl); if (leaf) { if (leaf == arg->p || arg->depth > 7) return -ELOOP; return check_loop(leaf, arg->p, arg->depth + 1); } return 0; } const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, }; /* * Delete/get qdisc. */ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; int err; err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); if (!p) { NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); } if (!q) { NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; } } else { q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) { NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); return -ENOENT; } } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } if (n->nlmsg_type == RTM_DELQDISC) { if (!clid) { NL_SET_ERR_MSG(extack, "Classid cannot be zero"); return -EINVAL; } if (q->handle == 0) { NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); return -ENOENT; } err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); if (err != 0) return err; } else { qdisc_notify(net, skb, n, clid, NULL, q, NULL); } return 0; } static bool req_create_or_replace(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && n->nlmsg_flags & NLM_F_REPLACE); } static bool req_create_exclusive(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && n->nlmsg_flags & NLM_F_EXCL); } static bool req_change(struct nlmsghdr *n) { return (!(n->nlmsg_flags & NLM_F_CREATE) && !(n->nlmsg_flags & NLM_F_REPLACE) && !(n->nlmsg_flags & NLM_F_EXCL)); } /* * Create/change qdisc. */ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid; struct Qdisc *q, *p; int err; replay: /* Reinit, just in case something touches this. */ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; tcm = nlmsg_data(n); clid = tcm->tcm_parent; q = p = NULL; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; if (clid) { if (clid != TC_H_ROOT) { if (clid != TC_H_INGRESS) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); if (!p) { NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); } /* It may be default qdisc, ignore it */ if (q && q->handle == 0) q = NULL; if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { if (tcm->tcm_handle) { if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); return -EEXIST; } if (TC_H_MIN(tcm->tcm_handle)) { NL_SET_ERR_MSG(extack, "Invalid minor handle"); return -EINVAL; } q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) goto create_n_graft; if (n->nlmsg_flags & NLM_F_EXCL) { NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } if (q->flags & TCQ_F_INGRESS) { NL_SET_ERR_MSG(extack, "Cannot regraft ingress or clsact Qdiscs"); return -EINVAL; } if (q == p || (p && check_loop(q, p, 0))) { NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; } if (clid == TC_H_INGRESS) { NL_SET_ERR_MSG(extack, "Ingress cannot graft directly"); return -EINVAL; } qdisc_refcount_inc(q); goto graft; } else { if (!q) goto create_n_graft; /* This magic test requires explanation. * * We know, that some child q is already * attached to this parent and have choice: * 1) change it or 2) create/graft new one. * If the requested qdisc kind is different * than the existing one, then we choose graft. * If they are the same then this is "change" * operation - just let it fallthrough.. * * 1. We are allowed to create/graft only * if the request is explicitly stating * "please create if it doesn't exist". * * 2. If the request is to exclusive create * then the qdisc tcm_handle is not expected * to exist, so that we choose create/graft too. * * 3. The last case is when no flags are set. * This will happen when for example tc * utility issues a "change" command. * Alas, it is sort of hole in API, we * cannot decide what to do unambiguously. * For now we select create/graft. */ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { if (req_create_or_replace(n) || req_create_exclusive(n)) goto create_n_graft; else if (req_change(n)) goto create_n_graft2; } } } } else { if (!tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Handle cannot be zero"); return -EINVAL; } q = qdisc_lookup(dev, tcm->tcm_handle); } /* Change qdisc parameters */ if (!q) { NL_SET_ERR_MSG(extack, "Specified qdisc not found"); return -ENOENT; } if (n->nlmsg_flags & NLM_F_EXCL) { NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } err = qdisc_change(q, tca, extack); if (err == 0) qdisc_notify(net, skb, n, clid, NULL, q, extack); return err; create_n_graft: if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; } create_n_graft2: if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), tcm->tcm_parent, tcm->tcm_parent, tca, &err, extack); } else { NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); err = -ENOENT; } } else { struct netdev_queue *dev_queue; if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) dev_queue = p->ops->cl_ops->select_queue(p, tcm); else if (p) dev_queue = p->dev_queue; else dev_queue = netdev_get_tx_queue(dev, 0); q = qdisc_create(dev, dev_queue, tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } if (q == NULL) { if (err == -EAGAIN) goto replay; return err; } graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) qdisc_put(q); return err; } return 0; } static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, bool dump_invisible) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; int b; if (!root) return 0; q = root; if (q_idx < s_q_idx) { q_idx++; } else { if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC, NULL) <= 0) goto done; q_idx++; } /* If dumping singletons, there is no qdisc_dev(root) and the singleton * itself has already been dumped. * * If we've already dumped the top-level (ingress) qdisc above and the global * qdisc hashtable, we don't want to hit it again */ if (!qdisc_dev(root) || !recur) goto out; hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; } if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC, NULL) <= 0) goto done; q_idx++; } out: *q_idx_p = q_idx; return ret; done: ret = -1; goto out; } static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; struct nlattr *tca[TCA_MAX + 1]; int err; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; idx = 0; ASSERT_RTNL(); err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, rtm_tca_policy, cb->extack); if (err < 0) return err; for_each_netdev(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), skb, cb, &q_idx, s_q_idx, true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, cb, &q_idx, s_q_idx, false, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; cont: idx++; } done: cb->args[0] = idx; cb->args[1] = q_idx; return skb->len; } /************************************************ * Traffic classes manipulation. * ************************************************/ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) goto nla_put_failure; if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tclass_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, int event, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) { kfree_skb(skb); return -EINVAL; } return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; int err = 0; if (!cops->delete) return -EOPNOTSUPP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_DELTCLASS, extack) < 0) { kfree_skb(skb); return -EINVAL; } err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; } err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); return err; } #ifdef CONFIG_NET_CLS struct tcf_bind_args { struct tcf_walker w; unsigned long base; unsigned long cl; u32 classid; }; static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_bind_args *a = (void *)arg; if (n && tp->ops->bind_class) { struct Qdisc *q = tcf_block_q(tp->chain->block); sch_tree_lock(q); tp->ops->bind_class(n, a->classid, a->cl, q, a->base); sch_tree_unlock(q); } return 0; } struct tc_bind_class_args { struct qdisc_walker w; unsigned long new_cl; u32 portid; u32 clid; }; static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { struct tc_bind_class_args *a = (struct tc_bind_class_args *)w; const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct tcf_block *block; struct tcf_chain *chain; block = cops->tcf_block(q, cl, NULL); if (!block) return 0; for (chain = tcf_get_next_chain(block, NULL); chain; chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; for (tp = tcf_get_next_proto(chain, NULL); tp; tp = tcf_get_next_proto(chain, tp)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = a->clid; arg.base = cl; arg.cl = a->new_cl; tp->ops->walk(tp, &arg.w, true); } } return 0; } static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, unsigned long new_cl) { const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct tc_bind_class_args args = {}; if (!cops->tcf_block) return; args.portid = portid; args.clid = clid; args.new_cl = new_cl; args.w.fn = tc_bind_class_walker; q->ops->cl_ops->walk(q, &args.w); } #else static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, unsigned long new_cl) { } #endif static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q = NULL; const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; u32 portid; u32 clid; u32 qid; int err; err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. parent == X:0 - parent is root class. parent == X:Y - parent is a node in hierarchy. parent == 0:Y - parent is X:Y, where X:0 is qdisc. handle == 0:0 - generate handle from kernel pool. handle == 0:Y - class is X:Y, where X:0 is qdisc. handle == X:Y - clear. handle == X:0 - root class. */ /* Step 1. Determine qdisc handle X:0 */ portid = tcm->tcm_parent; clid = tcm->tcm_handle; qid = TC_H_MAJ(clid); if (portid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ if (qid != qid1) return -EINVAL; } else if (qid1) { qid = qid1; } else if (qid == 0) qid = rtnl_dereference(dev->qdisc)->handle; /* Now qid is genuine qdisc handle consistent * both with parent and child. * * TC_H_MAJ(portid) still may be unspecified, complete it now. */ if (portid) portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = rtnl_dereference(dev->qdisc)->handle; } /* OK. Locate qdisc */ q = qdisc_lookup(dev, qid); if (!q) return -ENOENT; /* An check that it supports classes */ cops = q->ops->cl_ops; if (cops == NULL) return -EINVAL; /* Now try to get class */ if (clid == 0) { if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); if (clid) cl = cops->find(q, clid); if (cl == 0) { err = -ENOENT; if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags & NLM_F_CREATE)) goto out; } else { switch (n->nlmsg_type) { case RTM_NEWTCLASS: err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) goto out; break; case RTM_DELTCLASS: err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack); goto out; default: err = -EINVAL; goto out; } } if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); return -EOPNOTSUPP; } new_cl = cl; err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl, extack); if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack); /* We just create a new class, need to do reverse binding. */ if (cl != new_cl) tc_bind_tclass(q, portid, clid, new_cl); } out: return err; } struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; struct netlink_callback *cb; }; static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS, NULL); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, int *t_p, int s_t) { struct qdisc_dump_args arg; if (tc_qdisc_dump_ignore(q, false) || *t_p < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { (*t_p)++; return 0; } if (*t_p > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); arg.w.fn = qdisc_class_dump; arg.skb = skb; arg.cb = cb; arg.w.stop = 0; arg.w.skip = cb->args[1]; arg.w.count = 0; q->ops->cl_ops->walk(q, &arg.w); cb->args[1] = arg.w.count; if (arg.w.stop) return -1; (*t_p)++; return 0; } static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, int *t_p, int s_t, bool recur) { struct Qdisc *q; int b; if (!root) return 0; if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; if (!qdisc_dev(root) || !recur) return 0; if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); if (q && q != root && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } return 0; } static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { struct tcmsg *tcm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; struct net_device *dev; int t, s_t; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return 0; dev = dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return 0; s_t = cb->args[0]; t = 0; if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc), skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, tcm, cb, &t, s_t, false) < 0) goto done; done: cb->args[0] = t; dev_put(dev); return skb->len; } #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1000000, (u32)NSEC_PER_SEC / hrtimer_resolution); return 0; } static int __net_init psched_net_init(struct net *net) { struct proc_dir_entry *e; e = proc_create_single("psched", 0, net->proc_net, psched_show); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit psched_net_exit(struct net *net) { remove_proc_entry("psched", net->proc_net); } #else static int __net_init psched_net_init(struct net *net) { return 0; } static void __net_exit psched_net_exit(struct net *net) { } #endif static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, }; #if IS_ENABLED(CONFIG_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif static int __init pktsched_init(void) { int err; err = register_pernet_subsys(&psched_net_ops); if (err) { pr_err("pktsched_init: " "cannot initialize per netns operations\n"); return err; } register_qdisc(&pfifo_fast_ops); register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 0); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 0); tc_wrapper_init(); return 0; } subsys_initcall(pktsched_init); |
4313 120 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <net/net_namespace.h> #include <linux/if_arp.h> #include <net/rtnetlink.h> static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) { dev_lstats_add(dev, skb->len); dev_kfree_skb(skb); return NETDEV_TX_OK; } static int nlmon_dev_init(struct net_device *dev) { dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); return dev->lstats == NULL ? -ENOMEM : 0; } static void nlmon_dev_uninit(struct net_device *dev) { free_percpu(dev->lstats); } struct nlmon { struct netlink_tap nt; }; static int nlmon_open(struct net_device *dev) { struct nlmon *nlmon = netdev_priv(dev); nlmon->nt.dev = dev; nlmon->nt.module = THIS_MODULE; return netlink_add_tap(&nlmon->nt); } static int nlmon_close(struct net_device *dev) { struct nlmon *nlmon = netdev_priv(dev); return netlink_remove_tap(&nlmon->nt); } static void nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u64 packets, bytes; dev_lstats_read(dev, &packets, &bytes); stats->rx_packets = packets; stats->tx_packets = 0; stats->rx_bytes = bytes; stats->tx_bytes = 0; } static u32 always_on(struct net_device *dev) { return 1; } static const struct ethtool_ops nlmon_ethtool_ops = { .get_link = always_on, }; static const struct net_device_ops nlmon_ops = { .ndo_init = nlmon_dev_init, .ndo_uninit = nlmon_dev_uninit, .ndo_open = nlmon_open, .ndo_stop = nlmon_close, .ndo_start_xmit = nlmon_xmit, .ndo_get_stats64 = nlmon_get_stats64, }; static void nlmon_setup(struct net_device *dev) { dev->type = ARPHRD_NETLINK; dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &nlmon_ops; dev->ethtool_ops = &nlmon_ethtool_ops; dev->needs_free_netdev = true; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->flags = IFF_NOARP; /* That's rather a softlimit here, which, of course, * can be altered. Not a real MTU, but what is to be * expected in most cases. */ dev->mtu = NLMSG_GOODSIZE; dev->min_mtu = sizeof(struct nlmsghdr); } static int nlmon_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) return -EINVAL; return 0; } static struct rtnl_link_ops nlmon_link_ops __read_mostly = { .kind = "nlmon", .priv_size = sizeof(struct nlmon), .setup = nlmon_setup, .validate = nlmon_validate, }; static __init int nlmon_register(void) { return rtnl_link_register(&nlmon_link_ops); } static __exit void nlmon_unregister(void) { rtnl_link_unregister(&nlmon_link_ops); } module_init(nlmon_register); module_exit(nlmon_unregister); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_AUTHOR("Mathieu Geli <geli@enseirb.fr>"); MODULE_DESCRIPTION("Netlink monitoring device"); MODULE_ALIAS_RTNL_LINK("nlmon"); |
2 2 1 1 1 1 4 4 4 1 3 4 4 1 3 4 4 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | // SPDX-License-Identifier: GPL-2.0 /* * Native support for the I/O-Warrior USB devices * * Copyright (c) 2003-2005, 2020 Code Mercenaries GmbH * written by Christian Lucht <lucht@codemercs.com> and * Christoph Jung <jung@codemercs.com> * * based on * usb-skeleton.c by Greg Kroah-Hartman <greg@kroah.com> * brlvger.c by Stephane Dalton <sdalton@videotron.ca> * and Stephane Doyon <s.doyon@videotron.ca> * * Released under the GPLv2. */ #include <linux/module.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/usb/iowarrior.h> #define DRIVER_AUTHOR "Christian Lucht <lucht@codemercs.com>" #define DRIVER_DESC "USB IO-Warrior driver" #define USB_VENDOR_ID_CODEMERCS 1984 /* low speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW40 0x1500 #define USB_DEVICE_ID_CODEMERCS_IOW24 0x1501 #define USB_DEVICE_ID_CODEMERCS_IOWPV1 0x1511 #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 /* fuller speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 #define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 #define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a #define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define IOWARRIOR_MINOR_BASE 0 #else #define IOWARRIOR_MINOR_BASE 208 // SKELETON_MINOR_BASE 192 + 16, not official yet #endif /* interrupt input queue size */ #define MAX_INTERRUPT_BUFFER 16 /* maximum number of urbs that are submitted for writes at the same time, this applies to the IOWarrior56 only! IOWarrior24 and IOWarrior40 use synchronous usb_control_msg calls. */ #define MAX_WRITES_IN_FLIGHT 4 MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static struct usb_driver iowarrior_driver; /*--------------*/ /* data */ /*--------------*/ /* Structure to hold all of our device specific stuff */ struct iowarrior { struct mutex mutex; /* locks this structure */ struct usb_device *udev; /* save off the usb device pointer */ struct usb_interface *interface; /* the interface for this device */ unsigned char minor; /* the starting minor number for this device */ struct usb_endpoint_descriptor *int_out_endpoint; /* endpoint for reading (needed for IOW56 only) */ struct usb_endpoint_descriptor *int_in_endpoint; /* endpoint for reading */ struct urb *int_in_urb; /* the urb for reading data */ unsigned char *int_in_buffer; /* buffer for data to be read */ unsigned char serial_number; /* to detect lost packages */ unsigned char *read_queue; /* size is MAX_INTERRUPT_BUFFER * packet size */ wait_queue_head_t read_wait; wait_queue_head_t write_wait; /* wait-queue for writing to the device */ atomic_t write_busy; /* number of write-urbs submitted */ atomic_t read_idx; atomic_t intr_idx; atomic_t overflow_flag; /* signals an index 'rollover' */ int present; /* this is 1 as long as the device is connected */ int opened; /* this is 1 if the device is currently open */ char chip_serial[9]; /* the serial number string of the chip connected */ int report_size; /* number of bytes in a report */ u16 product_id; struct usb_anchor submitted; }; /*--------------*/ /* globals */ /*--------------*/ #define USB_REQ_GET_REPORT 0x01 //#if 0 static int usb_get_report(struct usb_device *dev, struct usb_host_interface *inter, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_REPORT, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, inter->desc.bInterfaceNumber, buf, size, USB_CTRL_GET_TIMEOUT); } //#endif #define USB_REQ_SET_REPORT 0x09 static int usb_set_report(struct usb_interface *intf, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(interface_to_usbdev(intf), usb_sndctrlpipe(interface_to_usbdev(intf), 0), USB_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, intf->cur_altsetting->desc.bInterfaceNumber, buf, size, 1000); } /*---------------------*/ /* driver registration */ /*---------------------*/ /* table of devices that work with this driver */ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW40)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); /* * USB callback handler for reading data */ static void iowarrior_callback(struct urb *urb) { struct iowarrior *dev = urb->context; int intr_idx; int read_idx; int aux_idx; int offset; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: return; default: goto exit; } intr_idx = atomic_read(&dev->intr_idx); /* aux_idx become previous intr_idx */ aux_idx = (intr_idx == 0) ? (MAX_INTERRUPT_BUFFER - 1) : (intr_idx - 1); read_idx = atomic_read(&dev->read_idx); /* queue is not empty and it's interface 0 */ if ((intr_idx != read_idx) && (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0)) { /* + 1 for serial number */ offset = aux_idx * (dev->report_size + 1); if (!memcmp (dev->read_queue + offset, urb->transfer_buffer, dev->report_size)) { /* equal values on interface 0 will be ignored */ goto exit; } } /* aux_idx become next intr_idx */ aux_idx = (intr_idx == (MAX_INTERRUPT_BUFFER - 1)) ? 0 : (intr_idx + 1); if (read_idx == aux_idx) { /* queue full, dropping oldest input */ read_idx = (++read_idx == MAX_INTERRUPT_BUFFER) ? 0 : read_idx; atomic_set(&dev->read_idx, read_idx); atomic_set(&dev->overflow_flag, 1); } /* +1 for serial number */ offset = intr_idx * (dev->report_size + 1); memcpy(dev->read_queue + offset, urb->transfer_buffer, dev->report_size); *(dev->read_queue + offset + (dev->report_size)) = dev->serial_number++; atomic_set(&dev->intr_idx, aux_idx); /* tell the blocking read about the new data */ wake_up_interruptible(&dev->read_wait); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&dev->interface->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* * USB Callback handler for write-ops */ static void iowarrior_write_callback(struct urb *urb) { struct iowarrior *dev; int status = urb->status; dev = urb->context; /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) { dev_dbg(&dev->interface->dev, "nonzero write bulk status received: %d\n", status); } /* free up our allocated buffer */ usb_free_coherent(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); /* tell a waiting writer the interrupt-out-pipe is available again */ atomic_dec(&dev->write_busy); wake_up_interruptible(&dev->write_wait); } /* * iowarrior_delete */ static inline void iowarrior_delete(struct iowarrior *dev) { dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor); kfree(dev->int_in_buffer); usb_free_urb(dev->int_in_urb); kfree(dev->read_queue); usb_put_intf(dev->interface); kfree(dev); } /*---------------------*/ /* fops implementation */ /*---------------------*/ static int read_index(struct iowarrior *dev) { int intr_idx, read_idx; read_idx = atomic_read(&dev->read_idx); intr_idx = atomic_read(&dev->intr_idx); return (read_idx == intr_idx ? -1 : read_idx); } /* * iowarrior_read */ static ssize_t iowarrior_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct iowarrior *dev; int read_idx; int offset; dev = file->private_data; /* verify that the device wasn't unplugged */ if (!dev || !dev->present) return -ENODEV; dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n", dev->minor, count); /* read count must be packet size (+ time stamp) */ if ((count != dev->report_size) && (count != (dev->report_size + 1))) return -EINVAL; /* repeat until no buffer overrun in callback handler occur */ do { atomic_set(&dev->overflow_flag, 0); if ((read_idx = read_index(dev)) == -1) { /* queue empty */ if (file->f_flags & O_NONBLOCK) return -EAGAIN; else { //next line will return when there is either new data, or the device is unplugged int r = wait_event_interruptible(dev->read_wait, (!dev->present || (read_idx = read_index (dev)) != -1)); if (r) { //we were interrupted by a signal return -ERESTART; } if (!dev->present) { //The device was unplugged return -ENODEV; } if (read_idx == -1) { // Can this happen ??? return 0; } } } offset = read_idx * (dev->report_size + 1); if (copy_to_user(buffer, dev->read_queue + offset, count)) { return -EFAULT; } } while (atomic_read(&dev->overflow_flag)); read_idx = ++read_idx == MAX_INTERRUPT_BUFFER ? 0 : read_idx; atomic_set(&dev->read_idx, read_idx); return count; } /* * iowarrior_write */ static ssize_t iowarrior_write(struct file *file, const char __user *user_buffer, size_t count, loff_t *ppos) { struct iowarrior *dev; int retval = 0; char *buf = NULL; /* for IOW24 and IOW56 we need a buffer */ struct urb *int_out_urb = NULL; dev = file->private_data; mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ if (!dev->present) { retval = -ENODEV; goto exit; } dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n", dev->minor, count); /* if count is 0 we're already done */ if (count == 0) { retval = 0; goto exit; } /* We only accept full reports */ if (count != dev->report_size) { retval = -EINVAL; goto exit; } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: /* IOW24 and IOW40 use a synchronous call */ buf = memdup_user(user_buffer, count); if (IS_ERR(buf)) { retval = PTR_ERR(buf); goto exit; } retval = usb_set_report(dev->interface, 2, 0, buf, count); kfree(buf); goto exit; case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto exit; } else { retval = wait_event_interruptible(dev->write_wait, (!dev->present || (atomic_read (&dev-> write_busy) < MAX_WRITES_IN_FLIGHT))); if (retval) { /* we were interrupted by a signal */ retval = -ERESTART; goto exit; } if (!dev->present) { /* The device was unplugged */ retval = -ENODEV; goto exit; } if (!dev->opened) { /* We were closed while waiting for an URB */ retval = -ENODEV; goto exit; } } } atomic_inc(&dev->write_busy); int_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!int_out_urb) { retval = -ENOMEM; goto error_no_urb; } buf = usb_alloc_coherent(dev->udev, dev->report_size, GFP_KERNEL, &int_out_urb->transfer_dma); if (!buf) { retval = -ENOMEM; dev_dbg(&dev->interface->dev, "Unable to allocate buffer\n"); goto error_no_buffer; } usb_fill_int_urb(int_out_urb, dev->udev, usb_sndintpipe(dev->udev, dev->int_out_endpoint->bEndpointAddress), buf, dev->report_size, iowarrior_write_callback, dev, dev->int_out_endpoint->bInterval); int_out_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (copy_from_user(buf, user_buffer, count)) { retval = -EFAULT; goto error; } usb_anchor_urb(int_out_urb, &dev->submitted); retval = usb_submit_urb(int_out_urb, GFP_KERNEL); if (retval) { dev_dbg(&dev->interface->dev, "submit error %d for urb nr.%d\n", retval, atomic_read(&dev->write_busy)); usb_unanchor_urb(int_out_urb); goto error; } /* submit was ok */ retval = count; usb_free_urb(int_out_urb); goto exit; default: /* what do we have here ? An unsupported Product-ID ? */ dev_err(&dev->interface->dev, "%s - not supported for product=0x%x\n", __func__, dev->product_id); retval = -EFAULT; goto exit; } error: usb_free_coherent(dev->udev, dev->report_size, buf, int_out_urb->transfer_dma); error_no_buffer: usb_free_urb(int_out_urb); error_no_urb: atomic_dec(&dev->write_busy); wake_up_interruptible(&dev->write_wait); exit: mutex_unlock(&dev->mutex); return retval; } /* * iowarrior_ioctl */ static long iowarrior_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct iowarrior *dev = NULL; __u8 *buffer; __u8 __user *user_buffer; int retval; int io_res; /* checks for bytes read/written and copy_to/from_user results */ dev = file->private_data; if (!dev) return -ENODEV; buffer = kzalloc(dev->report_size, GFP_KERNEL); if (!buffer) return -ENOMEM; mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ if (!dev->present) { retval = -ENODEV; goto error_out; } dev_dbg(&dev->interface->dev, "minor %d, cmd 0x%.4x, arg %ld\n", dev->minor, cmd, arg); retval = 0; io_res = 0; switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { user_buffer = (__u8 __user *)arg; io_res = copy_from_user(buffer, user_buffer, dev->report_size); if (io_res) { retval = -EFAULT; } else { io_res = usb_set_report(dev->interface, 2, 0, buffer, dev->report_size); if (io_res < 0) retval = io_res; } } else { retval = -EINVAL; dev_err(&dev->interface->dev, "ioctl 'IOW_WRITE' is not supported for product=0x%x.\n", dev->product_id); } break; case IOW_READ: user_buffer = (__u8 __user *)arg; io_res = usb_get_report(dev->udev, dev->interface->cur_altsetting, 1, 0, buffer, dev->report_size); if (io_res < 0) retval = io_res; else { io_res = copy_to_user(user_buffer, buffer, dev->report_size); if (io_res) retval = -EFAULT; } break; case IOW_GETINFO: { /* Report available information for the device */ struct iowarrior_info info; /* needed for power consumption */ struct usb_config_descriptor *cfg_descriptor = &dev->udev->actconfig->desc; memset(&info, 0, sizeof(info)); /* directly from the descriptor */ info.vendor = le16_to_cpu(dev->udev->descriptor.idVendor); info.product = dev->product_id; info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice); /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */ info.speed = dev->udev->speed; info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber; info.report_size = dev->report_size; /* serial number string has been read earlier 8 chars or empty string */ memcpy(info.serial, dev->chip_serial, sizeof(dev->chip_serial)); if (cfg_descriptor == NULL) { info.power = -1; /* no information available */ } else { /* the MaxPower is stored in units of 2mA to make it fit into a byte-value */ info.power = cfg_descriptor->bMaxPower * 2; } io_res = copy_to_user((struct iowarrior_info __user *)arg, &info, sizeof(struct iowarrior_info)); if (io_res) retval = -EFAULT; break; } default: /* return that we did not understand this ioctl call */ retval = -ENOTTY; break; } error_out: /* unlock the device */ mutex_unlock(&dev->mutex); kfree(buffer); return retval; } /* * iowarrior_open */ static int iowarrior_open(struct inode *inode, struct file *file) { struct iowarrior *dev = NULL; struct usb_interface *interface; int subminor; int retval = 0; subminor = iminor(inode); interface = usb_find_interface(&iowarrior_driver, subminor); if (!interface) { pr_err("%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } dev = usb_get_intfdata(interface); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); /* Only one process can open each device, no sharing. */ if (dev->opened) { retval = -EBUSY; goto out; } /* setup interrupt handler for receiving values */ if ((retval = usb_submit_urb(dev->int_in_urb, GFP_KERNEL)) < 0) { dev_err(&interface->dev, "Error %d while submitting URB\n", retval); retval = -EFAULT; goto out; } /* increment our usage count for the driver */ ++dev->opened; /* save our object in the file's private structure */ file->private_data = dev; retval = 0; out: mutex_unlock(&dev->mutex); return retval; } /* * iowarrior_release */ static int iowarrior_release(struct inode *inode, struct file *file) { struct iowarrior *dev; int retval = 0; dev = file->private_data; if (!dev) return -ENODEV; dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor); /* lock our device */ mutex_lock(&dev->mutex); if (dev->opened <= 0) { retval = -ENODEV; /* close called more than once */ mutex_unlock(&dev->mutex); } else { dev->opened = 0; /* we're closing now */ retval = 0; if (dev->present) { /* The device is still connected so we only shutdown pending read-/write-ops. */ usb_kill_urb(dev->int_in_urb); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); mutex_unlock(&dev->mutex); } else { /* The device was unplugged, cleanup resources */ mutex_unlock(&dev->mutex); iowarrior_delete(dev); } } return retval; } static __poll_t iowarrior_poll(struct file *file, poll_table * wait) { struct iowarrior *dev = file->private_data; __poll_t mask = 0; if (!dev->present) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); if (!dev->present) return EPOLLERR | EPOLLHUP; if (read_index(dev) != -1) mask |= EPOLLIN | EPOLLRDNORM; if (atomic_read(&dev->write_busy) < MAX_WRITES_IN_FLIGHT) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * File operations needed when we register this driver. * This assumes that this driver NEEDS file operations, * of course, which means that the driver is expected * to have a node in the /dev directory. If the USB * device were for a network interface then the driver * would use "struct net_driver" instead, and a serial * device would use "struct tty_driver". */ static const struct file_operations iowarrior_fops = { .owner = THIS_MODULE, .write = iowarrior_write, .read = iowarrior_read, .unlocked_ioctl = iowarrior_ioctl, .open = iowarrior_open, .release = iowarrior_release, .poll = iowarrior_poll, .llseek = noop_llseek, }; static char *iowarrior_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with devfs and the driver core */ static struct usb_class_driver iowarrior_class = { .name = "iowarrior%d", .devnode = iowarrior_devnode, .fops = &iowarrior_fops, .minor_base = IOWARRIOR_MINOR_BASE, }; /*---------------------------------*/ /* probe and disconnect functions */ /*---------------------------------*/ /* * iowarrior_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int iowarrior_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct iowarrior *dev = NULL; struct usb_host_interface *iface_desc; int retval = -ENOMEM; int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct iowarrior), GFP_KERNEL); if (!dev) return retval; mutex_init(&dev->mutex); atomic_set(&dev->intr_idx, 0); atomic_set(&dev->read_idx, 0); atomic_set(&dev->overflow_flag, 0); init_waitqueue_head(&dev->read_wait); atomic_set(&dev->write_busy, 0); init_waitqueue_head(&dev->write_wait); dev->udev = udev; dev->interface = usb_get_intf(interface); iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); init_usb_anchor(&dev->submitted); res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-in endpoint found\n"); retval = res; goto error; } if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-out endpoint found\n"); retval = res; goto error; } } /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); /* * Some devices need the report size to be different than the * endpoint size. */ if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) { switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: dev->report_size = 7; break; case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: dev->report_size = 4; break; case USB_DEVICE_ID_CODEMERCS_IOW100: dev->report_size = 12; break; } } /* create the urb and buffer for reading */ dev->int_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->int_in_urb) goto error; dev->int_in_buffer = kmalloc(dev->report_size, GFP_KERNEL); if (!dev->int_in_buffer) goto error; usb_fill_int_urb(dev->int_in_urb, dev->udev, usb_rcvintpipe(dev->udev, dev->int_in_endpoint->bEndpointAddress), dev->int_in_buffer, dev->report_size, iowarrior_callback, dev, dev->int_in_endpoint->bInterval); /* create an internal buffer for interrupt data from the device */ dev->read_queue = kmalloc_array(dev->report_size + 1, MAX_INTERRUPT_BUFFER, GFP_KERNEL); if (!dev->read_queue) goto error; /* Get the serial-number of the chip */ memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial)); usb_string(udev, udev->descriptor.iSerialNumber, dev->chip_serial, sizeof(dev->chip_serial)); if (strlen(dev->chip_serial) != 8) memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial)); /* Set the idle timeout to 0, if this is interface 0 */ if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) { usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x0A, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } /* allow device read and ioctl */ dev->present = 1; /* we can register the device now, as it is ready */ usb_set_intfdata(interface, dev); retval = usb_register_dev(interface, &iowarrior_class); if (retval) { /* something prevented us from registering this driver */ dev_err(&interface->dev, "Not able to get a minor for this device.\n"); goto error; } dev->minor = interface->minor; /* let the user know what node this device is now attached to */ dev_info(&interface->dev, "IOWarrior product=0x%x, serial=%s interface=%d " "now attached to iowarrior%d\n", dev->product_id, dev->chip_serial, iface_desc->desc.bInterfaceNumber, dev->minor - IOWARRIOR_MINOR_BASE); return retval; error: iowarrior_delete(dev); return retval; } /* * iowarrior_disconnect * * Called by the usb core when the device is removed from the system. */ static void iowarrior_disconnect(struct usb_interface *interface) { struct iowarrior *dev = usb_get_intfdata(interface); int minor = dev->minor; usb_deregister_dev(interface, &iowarrior_class); mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ dev->present = 0; if (dev->opened) { /* There is a process that holds a filedescriptor to the device , so we only shutdown read-/write-ops going on. Deleting the device is postponed until close() was called. */ usb_kill_urb(dev->int_in_urb); usb_kill_anchored_urbs(&dev->submitted); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); mutex_unlock(&dev->mutex); } else { /* no process is using the device, cleanup now */ mutex_unlock(&dev->mutex); iowarrior_delete(dev); } dev_info(&interface->dev, "I/O-Warror #%d now disconnected\n", minor - IOWARRIOR_MINOR_BASE); } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver iowarrior_driver = { .name = "iowarrior", .probe = iowarrior_probe, .disconnect = iowarrior_disconnect, .id_table = iowarrior_ids, }; module_usb_driver(iowarrior_driver); |
2 2 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2021 NXP */ #include "netlink.h" #include "common.h" struct phc_vclocks_req_info { struct ethnl_req_info base; }; struct phc_vclocks_reply_data { struct ethnl_reply_data base; int num; int *index; }; #define PHC_VCLOCKS_REPDATA(__reply_base) \ container_of(__reply_base, struct phc_vclocks_reply_data, base) const struct nla_policy ethnl_phc_vclocks_get_policy[] = { [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->num = ethtool_get_phc_vclocks(dev, &data->index); ethnl_ops_complete(dev); return ret; } static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); int len = 0; if (data->num > 0) { len += nla_total_size(sizeof(u32)); len += nla_total_size(sizeof(s32) * data->num); } return len; } static int phc_vclocks_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); if (data->num <= 0) return 0; if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) || nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX, sizeof(s32) * data->num, data->index)) return -EMSGSIZE; return 0; } static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base) { const struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); kfree(data->index); } const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = { .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER, .req_info_size = sizeof(struct phc_vclocks_req_info), .reply_data_size = sizeof(struct phc_vclocks_reply_data), .prepare_data = phc_vclocks_prepare_data, .reply_size = phc_vclocks_reply_size, .fill_reply = phc_vclocks_fill_reply, .cleanup_data = phc_vclocks_cleanup_data, }; |
14 14 1 1 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd. * https://www.oppo.com/ */ #include <linux/sysfs.h> #include <linux/kobject.h> #include "internal.h" enum { attr_feature, attr_pointer_ui, attr_pointer_bool, }; enum { struct_erofs_sb_info, struct_erofs_mount_opts, }; struct erofs_attr { struct attribute attr; short attr_id; int struct_type, offset; }; #define EROFS_ATTR(_name, _mode, _id) \ static struct erofs_attr erofs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ } #define EROFS_ATTR_FUNC(_name, _mode) EROFS_ATTR(_name, _mode, _name) #define EROFS_ATTR_FEATURE(_name) EROFS_ATTR(_name, 0444, feature) #define EROFS_ATTR_OFFSET(_name, _mode, _id, _struct) \ static struct erofs_attr erofs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ .struct_type = struct_##_struct, \ .offset = offsetof(struct _struct, _name),\ } #define EROFS_ATTR_RW(_name, _id, _struct) \ EROFS_ATTR_OFFSET(_name, 0644, _id, _struct) #define EROFS_RO_ATTR(_name, _id, _struct) \ EROFS_ATTR_OFFSET(_name, 0444, _id, _struct) #define EROFS_ATTR_RW_UI(_name, _struct) \ EROFS_ATTR_RW(_name, pointer_ui, _struct) #define EROFS_ATTR_RW_BOOL(_name, _struct) \ EROFS_ATTR_RW(_name, pointer_bool, _struct) #define ATTR_LIST(name) (&erofs_attr_##name.attr) #ifdef CONFIG_EROFS_FS_ZIP EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts); #endif static struct attribute *erofs_attrs[] = { #ifdef CONFIG_EROFS_FS_ZIP ATTR_LIST(sync_decompress), #endif NULL, }; ATTRIBUTE_GROUPS(erofs); /* Features this copy of erofs supports */ EROFS_ATTR_FEATURE(zero_padding); EROFS_ATTR_FEATURE(compr_cfgs); EROFS_ATTR_FEATURE(big_pcluster); EROFS_ATTR_FEATURE(chunked_file); EROFS_ATTR_FEATURE(device_table); EROFS_ATTR_FEATURE(compr_head2); EROFS_ATTR_FEATURE(sb_chksum); EROFS_ATTR_FEATURE(ztailpacking); EROFS_ATTR_FEATURE(fragments); EROFS_ATTR_FEATURE(dedupe); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), ATTR_LIST(compr_cfgs), ATTR_LIST(big_pcluster), ATTR_LIST(chunked_file), ATTR_LIST(device_table), ATTR_LIST(compr_head2), ATTR_LIST(sb_chksum), ATTR_LIST(ztailpacking), ATTR_LIST(fragments), ATTR_LIST(dedupe), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); static unsigned char *__struct_ptr(struct erofs_sb_info *sbi, int struct_type, int offset) { if (struct_type == struct_erofs_sb_info) return (unsigned char *)sbi + offset; if (struct_type == struct_erofs_mount_opts) return (unsigned char *)&sbi->opt + offset; return NULL; } static ssize_t erofs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, s_kobj); struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); switch (a->attr_id) { case attr_feature: return sysfs_emit(buf, "supported\n"); case attr_pointer_ui: if (!ptr) return 0; return sysfs_emit(buf, "%u\n", *(unsigned int *)ptr); case attr_pointer_bool: if (!ptr) return 0; return sysfs_emit(buf, "%d\n", *(bool *)ptr); } return 0; } static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) { struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, s_kobj); struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); unsigned long t; int ret; switch (a->attr_id) { case attr_pointer_ui: if (!ptr) return 0; ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; if (t != (unsigned int)t) return -ERANGE; #ifdef CONFIG_EROFS_FS_ZIP if (!strcmp(a->attr.name, "sync_decompress") && (t > EROFS_SYNC_DECOMPRESS_FORCE_OFF)) return -EINVAL; #endif *(unsigned int *)ptr = t; return len; case attr_pointer_bool: if (!ptr) return 0; ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; if (t != 0 && t != 1) return -EINVAL; *(bool *)ptr = !!t; return len; } return 0; } static void erofs_sb_release(struct kobject *kobj) { struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, s_kobj); complete(&sbi->s_kobj_unregister); } static const struct sysfs_ops erofs_attr_ops = { .show = erofs_attr_show, .store = erofs_attr_store, }; static const struct kobj_type erofs_sb_ktype = { .default_groups = erofs_groups, .sysfs_ops = &erofs_attr_ops, .release = erofs_sb_release, }; static const struct kobj_type erofs_ktype = { .sysfs_ops = &erofs_attr_ops, }; static struct kset erofs_root = { .kobj = {.ktype = &erofs_ktype}, }; static const struct kobj_type erofs_feat_ktype = { .default_groups = erofs_feat_groups, .sysfs_ops = &erofs_attr_ops, }; static struct kobject erofs_feat = { .kset = &erofs_root, }; int erofs_register_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); char *name; char *str = NULL; int err; if (erofs_is_fscache_mode(sb)) { if (sbi->domain_id) { str = kasprintf(GFP_KERNEL, "%s,%s", sbi->domain_id, sbi->fsid); if (!str) return -ENOMEM; name = str; } else { name = sbi->fsid; } } else { name = sb->s_id; } sbi->s_kobj.kset = &erofs_root; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name); kfree(str); if (err) goto put_sb_kobj; return 0; put_sb_kobj: kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); return err; } void erofs_unregister_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); if (sbi->s_kobj.state_in_sysfs) { kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); } } int __init erofs_init_sysfs(void) { int ret; kobject_set_name(&erofs_root.kobj, "erofs"); erofs_root.kobj.parent = fs_kobj; ret = kset_register(&erofs_root); if (ret) goto root_err; ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype, NULL, "features"); if (ret) goto feat_err; return ret; feat_err: kobject_put(&erofs_feat); kset_unregister(&erofs_root); root_err: return ret; } void erofs_exit_sysfs(void) { kobject_put(&erofs_feat); kset_unregister(&erofs_root); } |
331 7 7 316 315 8 2 2 322 295 285 322 134 136 133 39 40 6 18 16 22 2 12 13 13 5 4 2 23 9 12 1 2 3 6 6 9 7 7 4 3 21 5 16 8 8 5 1 9 8 9 8 9 8 7 8 9 31 32 32 32 32 30 32 32 32 32 39 40 15 1 10 4 9 5 9 5 10 4 11 2 13 1 13 1 12 2 13 1 9 6 12 3 9 6 14 1 12 3 14 10 5 14 1 24 9 2 19 2 2 10 2 1 6 7 3 1 1 1 1 1 9 9 9 9 9 8 8 8 8 8 9 9 9 9 9 9 9 9 9 1 9 9 9 9 9 9 9 9 8 9 9 9 9 9 9 9 1 8 1 9 9 1 8 9 8 1 8 9 1 9 1 8 1 8 9 9 8 8 1 9 9 9 1 9 7 9 9 9 9 8 9 8 9 7 9 9 9 9 9 8 9 9 9 8 9 8 9 9 9 21 8 7 6 11 2 6 3 1 9 1 1 8 7 7 1 8 7 2 1 1 1 8 38 1 36 1 3 3 3 3 27 4 3 1 2 2 1 31 4 2 28 9 8 6 2 1 16 3 5 10 8 1 10 2 2 2 2 2 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 9 9 9 29 29 29 29 29 16 2 9 15 29 29 12 10 1 10 10 1 10 9 9 1 1 35 1 34 1 32 1 1 1 2 1 31 7 24 29 2 26 3 16 16 23 2 17 14 1 2 8 6 7 5 2 3 1 3 4 4 10 1 3 1 3 2 2 2 1 2 2 2 5 3 1 2 1 1 12 3 2 5 2 1 4 6 5 3 2 2 3 1 1 5 6 2 6 20 20 1 1 22 22 20 20 20 2 2 12 5 6 7 10 11 9 8 1 7 1 4 2 1 1 2 2 1 1 1 1 9 9 1 1 12 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 7 6 1 1 1 1 2 1 1 1 1 1 1 1 1 1 6 6 1 5 3 1 1 1 2 8 1 9 7 1 7 8 8 9 10 10 10 8 1 8 2 2 8 8 8 7 8 8 7 8 8 8 6 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 4 3 1 1 1 2 1 1 2 1 1 1 1 2 1 1 2 2 5 3 2 3 2 4 1 5 4 1 5 4 1 3 1 3 9 2 5 3 3 1 1 4 1 1 9 1 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 1 2 1 3 2 1 3 3 3 2 2 2 2 2 3 2 1 1 1 1 2 1 1 2 2 6 1 1 1 3 1 4 3 1 1 5 1 12 11 12 2 4 3 1 2 1 1 14 3 3 3 8 6 3 7 1 7 1 1 7 7 7 7 7 7 5 6 1 7 7 4 1 1 4 4 1 1 1 1 1 1 1 1 1 1 5 5 2 3 1 2 2 2 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1 5 4 1 4 1 1 1 1 1 1 5 5 5 5 4 4 1 4 4 3 1 3 2 2 1 1 3 3 1 1 1 3 3 1 1 1 3 3 3 1 18 1 17 1 1 11 7 18 2 1 15 15 15 15 15 8 6 1 8 1 3 6 1 1 2 2 1 7 2 1 4 5 2 3 2 1 1 2 2 2 1 1 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 1 4 2 1 1 2 1 3 2 1 1 1 1 1 2 2 1 1 2 1 1 2 1 6 2 1 3 3 3 1 18 11 4 1 3 4 4 47 1 46 42 4 45 39 1 6 43 43 41 2 1 4 38 14 2 26 2 1 1 1 1 1 1 1 5 1 2 1 21 12 1 8 12 9 1 9 1 7 1 7 7 1 5 1 1 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 3 8 2 5 1 1 2 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 3 3 3 3 3 3 3 1 2 4 1 4 1 1 379 23 6 350 13 307 314 358 1 55 304 50 324 355 58 51 317 50 319 39 1 1 19 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 2 2 12 174 9 168 168 168 168 168 1 || // SPDX-License-Identifier: GPL-2.0-only /* * This is the new netlink-based wireless configuration interface. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/if.h> #include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/if_ether.h> #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/netlink.h> #include <linux/nospec.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include <net/net_namespace.h> #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/sock.h> #include <net/inet_connection_sock.h> #include "core.h" #include "nl80211.h" #include "reg.h" #include "rdev-ops.h" static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit); /* the netlink family */ static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { NL80211_MCGRP_CONFIG, NL80211_MCGRP_SCAN, NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG }, [NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN }, [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif }; /* returns ERR_PTR values */ static struct wireless_dev * __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev, struct net *netns, struct nlattr **attrs) { struct wireless_dev *result = NULL; bool have_ifidx = attrs[NL80211_ATTR_IFINDEX]; bool have_wdev_id = attrs[NL80211_ATTR_WDEV]; u64 wdev_id = 0; int wiphy_idx = -1; int ifidx = -1; if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); if (have_ifidx) ifidx = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); if (have_wdev_id) { wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); wiphy_idx = wdev_id >> 32; } if (rdev) { struct wireless_dev *wdev; lockdep_assert_held(&rdev->wiphy.mtx); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { result = wdev; break; } if (have_wdev_id && wdev->identifier == (u32)wdev_id) { result = wdev; break; } } return result ?: ERR_PTR(-ENODEV); } ASSERT_RTNL(); for_each_rdev(rdev) { struct wireless_dev *wdev; if (wiphy_net(&rdev->wiphy) != netns) continue; if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { result = wdev; break; } if (have_wdev_id && wdev->identifier == (u32)wdev_id) { result = wdev; break; } } if (result) break; } if (result) return result; return ERR_PTR(-ENODEV); } static struct cfg80211_registered_device * __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) { struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && !attrs[NL80211_ATTR_WDEV]) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_WIPHY]) rdev = cfg80211_rdev_by_wiphy_idx( nla_get_u32(attrs[NL80211_ATTR_WIPHY])); if (attrs[NL80211_ATTR_WDEV]) { u64 wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); struct wireless_dev *wdev; bool found = false; tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ list_for_each_entry(wdev, &tmp->wiphy.wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } if (!found) tmp = NULL; if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) tmp = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); else tmp = NULL; /* not wireless device -- return error */ if (!tmp) return ERR_PTR(-EINVAL); /* mismatch -- return error */ if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (!rdev) return ERR_PTR(-ENODEV); if (netns != wiphy_net(&rdev->wiphy)) return ERR_PTR(-ENODEV); return rdev; } /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. */ static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { return __cfg80211_rdev_from_attrs(netns, info->attrs); } static int validate_beacon_head(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; unsigned int fixedlen, hdrlen; bool s1g_bcn; if (len < offsetofend(typeof(*mgmt), frame_control)) goto err; s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { fixedlen = offsetof(struct ieee80211_ext, u.s1g_beacon.variable); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, u.beacon.variable); hdrlen = offsetof(struct ieee80211_mgmt, u.beacon); } if (len < fixedlen) goto err; if (ieee80211_hdrlen(mgmt->frame_control) != hdrlen) goto err; data += fixedlen; len -= fixedlen; for_each_element(elem, data, len) { /* nothing */ } if (for_each_element_completed(elem, data, len)) return 0; err: NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head"); return -EINVAL; } static int validate_ie_attr(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); const struct element *elem; for_each_element(elem, data, len) { /* nothing */ } if (for_each_element_completed(elem, data, len)) return 0; NL_SET_ERR_MSG_ATTR(extack, attr, "malformed information elements"); return -EINVAL; } static int validate_he_capa(const struct nlattr *attr, struct netlink_ext_ack *extack) { if (!ieee80211_he_capa_size_ok(nla_data(attr), nla_len(attr))) return -EINVAL; return 0; } /* policy for the attributes */ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR]; static const struct nla_policy nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = { [NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, }, [NL80211_FTM_RESP_ATTR_LCI] = { .type = NLA_BINARY, .len = U8_MAX }, [NL80211_FTM_RESP_ATTR_CIVICLOC] = { .type = NLA_BINARY, .len = U8_MAX }, }; static const struct nla_policy nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_ASAP] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE] = { .type = NLA_U32 }, [NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 }, [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = NLA_POLICY_MAX(NLA_U8, 31), [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR] = { .type = NLA_U8 }, }; static const struct nla_policy nl80211_pmsr_req_data_policy[NL80211_PMSR_TYPE_MAX + 1] = { [NL80211_PMSR_TYPE_FTM] = NLA_POLICY_NESTED(nl80211_pmsr_ftm_req_attr_policy), }; static const struct nla_policy nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_REQ_ATTR_DATA] = NLA_POLICY_NESTED(nl80211_pmsr_req_data_policy), [NL80211_PMSR_REQ_ATTR_GET_AP_TSF] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = { [NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR, [NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy), [NL80211_PMSR_PEER_ATTR_REQ] = NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy), [NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT }, }; static const struct nla_policy nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { [NL80211_PMSR_ATTR_MAX_PEERS] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_REPORT_AP_TSF] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_PEERS] = NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy), }; static const struct nla_policy he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { [NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP] = NLA_POLICY_EXACT_LEN(8), [NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP] = NLA_POLICY_EXACT_LEN(8), [NL80211_HE_OBSS_PD_ATTR_SR_CTRL] = { .type = NLA_U8 }, }; static const struct nla_policy he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { [NL80211_HE_BSS_COLOR_ATTR_COLOR] = NLA_POLICY_RANGE(NLA_U8, 1, 63), [NL80211_HE_BSS_COLOR_ATTR_DISABLED] = { .type = NLA_FLAG }, [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_TXRATE_HT] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_HT_RATES }, [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)), [NL80211_TXRATE_GI] = { .type = NLA_U8 }, [NL80211_TXRATE_HE] = NLA_POLICY_EXACT_LEN(sizeof(struct nl80211_txrate_he)), [NL80211_TXRATE_HE_GI] = NLA_POLICY_RANGE(NLA_U8, NL80211_RATE_INFO_HE_GI_0_8, NL80211_RATE_INFO_HE_GI_3_2), [NL80211_TXRATE_HE_LTF] = NLA_POLICY_RANGE(NLA_U8, NL80211_RATE_INFO_HE_1XLTF, NL80211_RATE_INFO_HE_4XLTF), }; static const struct nla_policy nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_PEER_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE] = NLA_POLICY_MAX(NLA_U8, NL80211_TX_RATE_FIXED), [NL80211_TID_CONFIG_ATTR_TX_RATE] = NLA_POLICY_NESTED(nl80211_txattr_policy), }; static const struct nla_policy nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = { [NL80211_FILS_DISCOVERY_ATTR_INT_MIN] = NLA_POLICY_MAX(NLA_U32, 10000), [NL80211_FILS_DISCOVERY_ATTR_INT_MAX] = NLA_POLICY_MAX(NLA_U32, 10000), [NL80211_FILS_DISCOVERY_ATTR_TMPL] = NLA_POLICY_RANGE(NLA_BINARY, NL80211_FILS_DISCOVERY_TMPL_MIN_LEN, IEEE80211_MAX_DATA_LEN), }; static const struct nla_policy nl80211_unsol_bcast_probe_resp_policy[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1] = { [NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] = NLA_POLICY_MAX(NLA_U32, 20), [NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN } }; static const struct nla_policy sar_specs_policy[NL80211_SAR_ATTR_SPECS_MAX + 1] = { [NL80211_SAR_ATTR_SPECS_POWER] = { .type = NLA_S32 }, [NL80211_SAR_ATTR_SPECS_RANGE_INDEX] = {.type = NLA_U32 }, }; static const struct nla_policy sar_policy[NL80211_SAR_ATTR_MAX + 1] = { [NL80211_SAR_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_SAR_TYPE), [NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy), }; static const struct nla_policy nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = { [NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES] = NLA_POLICY_MIN(NLA_U8, 2), [NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 }, [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 }, [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; static const struct netlink_range_validation nl80211_punct_bitmap_range = { .min = 0, .max = 0xffff, }; static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8, NL80211_EDMG_CHANNELS_MIN, NL80211_EDMG_CHANNELS_MAX), [NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8, NL80211_EDMG_BW_CONFIG_MIN, NL80211_EDMG_BW_CONFIG_MAX), [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_WIPHY_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_IFTYPE] = NLA_POLICY_MAX(NLA_U32, NL80211_IFTYPE_MAX), [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [NL80211_ATTR_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_PREV_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 7), [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_ATTR_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES), [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, [NL80211_ATTR_BEACON_HEAD] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_BEACON_TAIL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_STA_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 }, [NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_STA_PLINK_ACTION] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_ACTIONS - 1), [NL80211_ATTR_STA_TX_POWER_SETTING] = NLA_POLICY_RANGE(NLA_U8, NL80211_TX_POWER_AUTOMATIC, NL80211_TX_POWER_FIXED), [NL80211_ATTR_STA_TX_POWER] = { .type = NLA_S16 }, [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG }, [NL80211_ATTR_HT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_HT_CAPABILITY_LEN), [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, [NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED }, [NL80211_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, [NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG }, [NL80211_ATTR_USE_MFP] = NLA_POLICY_RANGE(NLA_U32, NL80211_MFP_NO, NL80211_MFP_OPTIONAL), [NL80211_ATTR_STA_FLAGS2] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_sta_flag_update)), [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, [NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN), [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, [NL80211_ATTR_FRAME] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, }, [NL80211_ATTR_PS_STATE] = NLA_POLICY_RANGE(NLA_U32, NL80211_PS_DISABLED, NL80211_PS_ENABLED), [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1), [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_MESH_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, [NL80211_ATTR_HIDDEN_SSID] = NLA_POLICY_RANGE(NLA_U32, NL80211_HIDDEN_SSID_NOT_IN_USE, NL80211_HIDDEN_SSID_ZERO_CONTENTS), [NL80211_ATTR_IE_PROBE_RESP] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_IE_ASSOC_RESP] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_STA_WME] = NLA_POLICY_NESTED(nl80211_sta_wme_policy), [NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED }, [NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_ACTION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_DIALOG_TOKEN] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG }, [NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 }, [NL80211_ATTR_DISABLE_HT] = { .type = NLA_FLAG }, [NL80211_ATTR_HT_CAPABILITY_MASK] = { .len = NL80211_HT_CAPABILITY_LEN }, [NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 }, [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, /* need to include at least Auth Transaction and Status Code */ [NL80211_ATTR_AUTH_DATA] = NLA_POLICY_MIN_LEN(4), [NL80211_ATTR_VHT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_VHT_CAPABILITY_LEN), [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127), [NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = NLA_POLICY_RANGE(NLA_U32, NL80211_MESH_POWER_UNKNOWN + 1, NL80211_MESH_POWER_MAX), [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, [NL80211_ATTR_VHT_CAPABILITY_MASK] = { .len = NL80211_VHT_CAPABILITY_LEN, }, [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = NLA_POLICY_MAX(NLA_U16, NL80211_CRIT_PROTO_MAX_DURATION), [NL80211_ATTR_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, [NL80211_ATTR_CNTDWN_OFFS_BEACON] = { .type = NLA_BINARY }, [NL80211_ATTR_CNTDWN_OFFS_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = NLA_POLICY_MIN_LEN(2), /* * The value of the Length field of the Supported Operating * Classes element is between 2 and 253. */ [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = NLA_POLICY_RANGE(NLA_BINARY, 2, 253), [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, [NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 }, [NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 }, [NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, [NL80211_ATTR_QOS_MAP] = NLA_POLICY_RANGE(NLA_BINARY, IEEE80211_QOS_MAP_LEN_MIN, IEEE80211_QOS_MAP_LEN_MAX), [NL80211_ATTR_MAC_HINT] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, [NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG }, [NL80211_ATTR_TSID] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_TIDS - 1), [NL80211_ATTR_USER_PRIO] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1), [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, [NL80211_ATTR_PBSS] = { .type = NLA_FLAG }, [NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_SUPPORT_P2P_PS] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_P2P_PS_STATUS - 1), [NL80211_ATTR_MU_MIMO_GROUP_DATA] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_BANDS] = { .type = NLA_U32 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, .len = FILS_MAX_KEK_LEN }, [NL80211_ATTR_FILS_NONCES] = NLA_POLICY_EXACT_LEN_WARN(2 * FILS_NONCE_LEN), [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, [NL80211_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 }, [NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, [NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 }, [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_USERNAME_LEN }, [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_REALM_LEN }, [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 }, [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_RRK_LEN }, [NL80211_ATTR_FILS_CACHE_ID] = NLA_POLICY_EXACT_LEN_WARN(2), [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, [NL80211_ATTR_PMKR0_NAME] = NLA_POLICY_EXACT_LEN(WLAN_PMK_NAME_LEN), [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG }, [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 }, [NL80211_ATTR_HE_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa, NL80211_HE_MAX_CAPABILITY_LEN), [NL80211_ATTR_FTM_RESPONDER] = NLA_POLICY_NESTED(nl80211_ftm_responder_policy), [NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PEER_MEASUREMENTS] = NLA_POLICY_NESTED(nl80211_pmsr_attr_policy), [NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY, .len = SAE_PASSWORD_MAX_LEN }, [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), [NL80211_ATTR_TID_CONFIG] = NLA_POLICY_NESTED_ARRAY(nl80211_tid_config_attr_policy), [NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG }, [NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100), [NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG }, [NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), [NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED }, [NL80211_ATTR_HE_6GHZ_CAPABILITY] = NLA_POLICY_EXACT_LEN(sizeof(struct ieee80211_he_6ghz_capa)), [NL80211_ATTR_FILS_DISCOVERY] = NLA_POLICY_NESTED(nl80211_fils_discovery_policy), [NL80211_ATTR_UNSOL_BCAST_PROBE_RESP] = NLA_POLICY_NESTED(nl80211_unsol_bcast_probe_resp_policy), [NL80211_ATTR_S1G_CAPABILITY] = NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), [NL80211_ATTR_S1G_CAPABILITY_MASK] = NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), [NL80211_ATTR_SAE_PWE] = NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK, NL80211_SAE_PWE_BOTH), [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT }, [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy), [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG }, [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 }, [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), [NL80211_ATTR_MBSSID_CONFIG] = NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_EHT_CAPABILITY] = NLA_POLICY_RANGE(NLA_BINARY, NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, [NL80211_ATTR_MLO_LINKS] = NLA_POLICY_NESTED_ARRAY(nl80211_policy), [NL80211_ATTR_MLO_LINK_ID] = NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS), [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, [NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES - 1), [NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_KEY_MODE] = NLA_POLICY_RANGE(NLA_U8, 0, NL80211_KEY_SET_TX), }; /* policy for the key default flags */ static const struct nla_policy nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { [NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; #ifdef CONFIG_PM /* policy for WoWLAN attributes */ static const struct nla_policy nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_ANY] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_DISCONNECT] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_MAGIC_PKT] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_PKT_PATTERN] = { .type = NLA_NESTED }, [NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, [NL80211_WOWLAN_TRIG_NET_DETECT] = { .type = NLA_NESTED }, }; static const struct nla_policy nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_DST_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = NLA_POLICY_MIN_LEN(1), [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { .len = sizeof(struct nl80211_wowlan_tcp_data_seq) }, [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { .len = sizeof(struct nl80211_wowlan_tcp_data_token) }, [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = NLA_POLICY_MIN_LEN(1), [NL80211_WOWLAN_TCP_WAKE_MASK] = NLA_POLICY_MIN_LEN(1), }; #endif /* CONFIG_PM */ /* policy for coalesce rule attributes */ static const struct nla_policy nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = { [NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_COALESCE_RULE_CONDITION] = NLA_POLICY_RANGE(NLA_U32, NL80211_COALESCE_CONDITION_MATCH, NL80211_COALESCE_CONDITION_NO_MATCH), [NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED }, }; /* policy for GTK rekey offload attributes */ static const struct nla_policy nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { [NL80211_REKEY_DATA_KEK] = { .type = NLA_BINARY, .len = NL80211_KEK_EXT_LEN }, [NL80211_REKEY_DATA_KCK] = { .type = NLA_BINARY, .len = NL80211_KCK_EXT_LEN_32 }, [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN(NL80211_REPLAY_CTR_LEN), [NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 }, }; static const struct nla_policy nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { [NL80211_BAND_2GHZ] = { .type = NLA_S32 }, [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, [NL80211_BAND_LC] = { .type = NLA_S32 }, }; static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, [NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] = NLA_POLICY_NESTED(nl80211_match_band_rssi_policy), }; static const struct nla_policy nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = { [NL80211_SCHED_SCAN_PLAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 }, }; static const struct nla_policy nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { [NL80211_BSS_SELECT_ATTR_RSSI] = { .type = NLA_FLAG }, [NL80211_BSS_SELECT_ATTR_BAND_PREF] = { .type = NLA_U32 }, [NL80211_BSS_SELECT_ATTR_RSSI_ADJUST] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, }; /* policy for NAN function attributes */ static const struct nla_policy nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { [NL80211_NAN_FUNC_TYPE] = NLA_POLICY_MAX(NLA_U8, NL80211_NAN_FUNC_MAX_TYPE), [NL80211_NAN_FUNC_SERVICE_ID] = { .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, }; /* policy for Service Response Filter attributes */ static const struct nla_policy nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for packet pattern attributes */ static const struct nla_policy nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, }; static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev, struct nlattr **attrbuf) { int err; if (!cb->args[0]) { struct nlattr **attrbuf_free = NULL; if (!attrbuf) { attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; attrbuf_free = attrbuf; } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) { kfree(attrbuf_free); return err; } rtnl_lock(); *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk), attrbuf); kfree(attrbuf_free); if (IS_ERR(*wdev)) { rtnl_unlock(); return PTR_ERR(*wdev); } *rdev = wiphy_to_rdev((*wdev)->wiphy); mutex_lock(&(*rdev)->wiphy.mtx); rtnl_unlock(); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { /* subtract the 1 again here */ struct wiphy *wiphy; struct wireless_dev *tmp; rtnl_lock(); wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); if (!wiphy) { rtnl_unlock(); return -ENODEV; } *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; list_for_each_entry(tmp, &(*rdev)->wiphy.wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } if (!*wdev) { rtnl_unlock(); return -ENODEV; } mutex_lock(&(*rdev)->wiphy.mtx); rtnl_unlock(); } return 0; } /* message building helper */ void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, const struct ieee80211_reg_rule *rule) { int j; struct nlattr *nl_wmm_rules = nla_nest_start_noflag(msg, NL80211_FREQUENCY_ATTR_WMM); if (!nl_wmm_rules) goto nla_put_failure; for (j = 0; j < IEEE80211_NUM_ACS; j++) { struct nlattr *nl_wmm_rule = nla_nest_start_noflag(msg, j); if (!nl_wmm_rule) goto nla_put_failure; if (nla_put_u16(msg, NL80211_WMMR_CW_MIN, rule->wmm_rule.client[j].cw_min) || nla_put_u16(msg, NL80211_WMMR_CW_MAX, rule->wmm_rule.client[j].cw_max) || nla_put_u8(msg, NL80211_WMMR_AIFSN, rule->wmm_rule.client[j].aifsn) || nla_put_u16(msg, NL80211_WMMR_TXOP, rule->wmm_rule.client[j].cot)) goto nla_put_failure; nla_nest_end(msg, nl_wmm_rule); } nla_nest_end(msg, nl_wmm_rules); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, struct ieee80211_channel *chan, bool large) { /* Some channels must be completely excluded from the * list to protect old user-space tools from breaking */ if (!large && chan->flags & (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) return 0; if (!large && chan->freq_offset) return 0; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_PSD) && nla_put_s8(msg, NL80211_FREQUENCY_ATTR_PSD, chan->psd)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; if (chan->flags & IEEE80211_CHAN_NO_IR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IR)) goto nla_put_failure; if (nla_put_flag(msg, __NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; } if (chan->flags & IEEE80211_CHAN_RADAR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) goto nla_put_failure; if (large) { u32 time; time = elapsed_jiffies_msecs(chan->dfs_state_entered); if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, chan->dfs_state)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, chan->dfs_cac_ms)) goto nla_put_failure; } } if (large) { if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_HE) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HE)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_1MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_1MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_2MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_2MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_4MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_4MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_8MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_8MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_16MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_16MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_320MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_320MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_EHT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_EHT)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) goto nla_put_failure; if (large) { const struct ieee80211_reg_rule *rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) { if (nl80211_msg_put_wmm_rules(msg, rule)) goto nla_put_failure; } } return 0; nla_put_failure: return -ENOBUFS; } static bool nl80211_put_txq_stats(struct sk_buff *msg, struct cfg80211_txq_stats *txqstats, int attrtype) { struct nlattr *txqattr; #define PUT_TXQVAL_U32(attr, memb) do { \ if (txqstats->filled & BIT(NL80211_TXQ_STATS_ ## attr) && \ nla_put_u32(msg, NL80211_TXQ_STATS_ ## attr, txqstats->memb)) \ return false; \ } while (0) txqattr = nla_nest_start_noflag(msg, attrtype); if (!txqattr) return false; PUT_TXQVAL_U32(BACKLOG_BYTES, backlog_bytes); PUT_TXQVAL_U32(BACKLOG_PACKETS, backlog_packets); PUT_TXQVAL_U32(FLOWS, flows); PUT_TXQVAL_U32(DROPS, drops); PUT_TXQVAL_U32(ECN_MARKS, ecn_marks); PUT_TXQVAL_U32(OVERLIMIT, overlimit); PUT_TXQVAL_U32(OVERMEMORY, overmemory); PUT_TXQVAL_U32(COLLISIONS, collisions); PUT_TXQVAL_U32(TX_BYTES, tx_bytes); PUT_TXQVAL_U32(TX_PACKETS, tx_packets); PUT_TXQVAL_U32(MAX_FLOWS, max_flows); nla_nest_end(msg, txqattr); #undef PUT_TXQVAL_U32 return true; } /* netlink command implementations */ /** * nl80211_link_id - return link ID * @attrs: attributes to look at * * Returns: the link ID or 0 if not given * * Note this function doesn't do any validation of the link * ID validity wrt. links that were actually added, so it must * be called only from ops with %NL80211_FLAG_MLO_VALID_LINK_ID * or if additional validation is done. */ static unsigned int nl80211_link_id(struct nlattr **attrs) { struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; if (!linkid) return 0; return nla_get_u8(linkid); } static int nl80211_link_id_or_invalid(struct nlattr **attrs) { struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; if (!linkid) return -1; return nla_get_u8(linkid); } struct key_parse { struct key_params p; int idx; int type; bool def, defmgmt, defbeacon; bool def_uni, def_multi; }; static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; int err = nla_parse_nested_deprecated(tb, NL80211_KEY_MAX, key, nl80211_key_policy, info->extack); if (err) return err; k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; k->defbeacon = !!tb[NL80211_KEY_DEFAULT_BEACON]; if (k->def) { k->def_uni = true; k->def_multi = true; } if (k->defmgmt || k->defbeacon) k->def_multi = true; if (tb[NL80211_KEY_IDX]) k->idx = nla_get_u8(tb[NL80211_KEY_IDX]); if (tb[NL80211_KEY_DATA]) { k->p.key = nla_data(tb[NL80211_KEY_DATA]); k->p.key_len = nla_len(tb[NL80211_KEY_DATA]); } if (tb[NL80211_KEY_SEQ]) { k->p.seq = nla_data(tb[NL80211_KEY_SEQ]); k->p.seq_len = nla_len(tb[NL80211_KEY_SEQ]); } if (tb[NL80211_KEY_CIPHER]) k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); if (tb[NL80211_KEY_TYPE]) k->type = nla_get_u32(tb[NL80211_KEY_TYPE]); if (tb[NL80211_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; err = nla_parse_nested_deprecated(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, tb[NL80211_KEY_DEFAULT_TYPES], nl80211_key_default_policy, info->extack); if (err) return err; k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; } if (tb[NL80211_KEY_MODE]) k->p.mode = nla_get_u8(tb[NL80211_KEY_MODE]); return 0; } static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) { if (info->attrs[NL80211_ATTR_KEY_DATA]) { k->p.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]); k->p.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]); } if (info->attrs[NL80211_ATTR_KEY_SEQ]) { k->p.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]); k->p.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]); } if (info->attrs[NL80211_ATTR_KEY_IDX]) k->idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); if (info->attrs[NL80211_ATTR_KEY_CIPHER]) k->p.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]); k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; if (k->def) { k->def_uni = true; k->def_multi = true; } if (k->defmgmt) k->def_multi = true; if (info->attrs[NL80211_ATTR_KEY_TYPE]) k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; int err = nla_parse_nested_deprecated(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], nl80211_key_default_policy, info->extack); if (err) return err; k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; } return 0; } static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) { int err; memset(k, 0, sizeof(*k)); k->idx = -1; k->type = -1; if (info->attrs[NL80211_ATTR_KEY]) err = nl80211_parse_key_new(info, info->attrs[NL80211_ATTR_KEY], k); else err = nl80211_parse_key_old(info, k); if (err) return err; if ((k->def ? 1 : 0) + (k->defmgmt ? 1 : 0) + (k->defbeacon ? 1 : 0) > 1) { GENL_SET_ERR_MSG(info, "key with multiple default flags is invalid"); return -EINVAL; } if (k->defmgmt || k->defbeacon) { if (k->def_uni || !k->def_multi) { GENL_SET_ERR_MSG(info, "defmgmt/defbeacon key must be mcast"); return -EINVAL; } } if (k->idx != -1) { if (k->defmgmt) { if (k->idx < 4 || k->idx > 5) { GENL_SET_ERR_MSG(info, "defmgmt key idx not 4 or 5"); return -EINVAL; } } else if (k->defbeacon) { if (k->idx < 6 || k->idx > 7) { GENL_SET_ERR_MSG(info, "defbeacon key idx not 6 or 7"); return -EINVAL; } } else if (k->def) { if (k->idx < 0 || k->idx > 3) { GENL_SET_ERR_MSG(info, "def key idx not 0-3"); return -EINVAL; } } else { if (k->idx < 0 || k->idx > 7) { GENL_SET_ERR_MSG(info, "key idx not 0-7"); return -EINVAL; } } } return 0; } static struct cfg80211_cached_keys * nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, struct genl_info *info, bool *no_ht) { struct nlattr *keys = info->attrs[NL80211_ATTR_KEYS]; struct key_parse parse; struct nlattr *key; struct cfg80211_cached_keys *result; int rem, err, def = 0; bool have_key = false; nla_for_each_nested(key, keys, rem) { have_key = true; break; } if (!have_key) return NULL; result = kzalloc(sizeof(*result), GFP_KERNEL); if (!result) return ERR_PTR(-ENOMEM); result->def = -1; nla_for_each_nested(key, keys, rem) { memset(&parse, 0, sizeof(parse)); parse.idx = -1; err = nl80211_parse_key_new(info, key, &parse); if (err) goto error; err = -EINVAL; if (!parse.p.key) goto error; if (parse.idx < 0 || parse.idx > 3) { GENL_SET_ERR_MSG(info, "key index out of range [0-3]"); goto error; } if (parse.def) { if (def) { GENL_SET_ERR_MSG(info, "only one key can be default"); goto error; } def = 1; result->def = parse.idx; if (!parse.def_uni || !parse.def_multi) goto error; } else if (parse.defmgmt) goto error; err = cfg80211_validate_key_settings(rdev, &parse.p, parse.idx, false, NULL); if (err) goto error; if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { GENL_SET_ERR_MSG(info, "connect key must be WEP"); err = -EINVAL; goto error; } result->params[parse.idx].cipher = parse.p.cipher; result->params[parse.idx].key_len = parse.p.key_len; result->params[parse.idx].key = result->data[parse.idx]; memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); /* must be WEP key if we got here */ if (no_ht) *no_ht = true; } if (result->def < 0) { err = -EINVAL; GENL_SET_ERR_MSG(info, "need a default/TX key"); goto error; } return result; error: kfree(result); return ERR_PTR(err); } static int nl80211_key_allowed(struct wireless_dev *wdev) { lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) return 0; return -ENOLINK; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) return 0; return -ENOLINK; case NL80211_IFTYPE_NAN: if (wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return 0; return -EINVAL; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: return -EINVAL; } return 0; } static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy, u32 freq) { struct ieee80211_channel *chan; chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) return NULL; return chan; } static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes) { struct nlattr *nl_modes = nla_nest_start_noflag(msg, attr); int i; if (!nl_modes) goto nla_put_failure; i = 0; while (ifmodes) { if ((ifmodes & 1) && nla_put_flag(msg, i)) goto nla_put_failure; ifmodes >>= 1; i++; } nla_nest_end(msg, nl_modes); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct sk_buff *msg, bool large) { struct nlattr *nl_combis; int i, j; nl_combis = nla_nest_start_noflag(msg, NL80211_ATTR_INTERFACE_COMBINATIONS); if (!nl_combis) goto nla_put_failure; for (i = 0; i < wiphy->n_iface_combinations; i++) { const struct ieee80211_iface_combination *c; struct nlattr *nl_combi, *nl_limits; c = &wiphy->iface_combinations[i]; nl_combi = nla_nest_start_noflag(msg, i + 1); if (!nl_combi) goto nla_put_failure; nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS); if (!nl_limits) goto nla_put_failure; for (j = 0; j < c->n_limits; j++) { struct nlattr *nl_limit; nl_limit = nla_nest_start_noflag(msg, j + 1); if (!nl_limit) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, c->limits[j].max)) goto nla_put_failure; if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES, c->limits[j].types)) goto nla_put_failure; nla_nest_end(msg, nl_limit); } nla_nest_end(msg, nl_limits); if (c->beacon_int_infra_match && nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS, c->num_different_channels) || nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; if (large && (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, c->radar_detect_widths) || nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, c->radar_detect_regions))) goto nla_put_failure; if (c->beacon_int_min_gcd && nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, c->beacon_int_min_gcd)) goto nla_put_failure; nla_nest_end(msg, nl_combi); } nla_nest_end(msg, nl_combis); return 0; nla_put_failure: return -ENOBUFS; } #ifdef CONFIG_PM static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp; struct nlattr *nl_tcp; if (!tcp) return 0; nl_tcp = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->data_payload_max)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->data_payload_max)) return -ENOBUFS; if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) return -ENOBUFS; if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, sizeof(*tcp->tok), tcp->tok)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, tcp->data_interval_max)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, tcp->wake_payload_max)) return -ENOBUFS; nla_nest_end(msg, nl_tcp); return 0; } static int nl80211_send_wowlan(struct sk_buff *msg, struct cfg80211_registered_device *rdev, bool large) { struct nlattr *nl_wowlan; if (!rdev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; if (rdev->wiphy.wowlan->n_patterns) { struct nl80211_pattern_support pat = { .max_patterns = rdev->wiphy.wowlan->n_patterns, .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len, .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len, .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) return -ENOBUFS; } if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) && nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT, rdev->wiphy.wowlan->max_nd_match_sets)) return -ENOBUFS; if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; nla_nest_end(msg, nl_wowlan); return 0; } #endif static int nl80211_send_coalesce(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct nl80211_coalesce_rule_support rule; if (!rdev->wiphy.coalesce) return 0; rule.max_rules = rdev->wiphy.coalesce->n_rules; rule.max_delay = rdev->wiphy.coalesce->max_delay; rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns; rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len; rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len; rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset; if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule)) return -ENOBUFS; return 0; } static int nl80211_send_iftype_data(struct sk_buff *msg, const struct ieee80211_supported_band *sband, const struct ieee80211_sband_iftype_data *iftdata) { const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap; const struct ieee80211_sta_eht_cap *eht_cap = &iftdata->eht_cap; if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES, iftdata->types_mask)) return -ENOBUFS; if (he_cap->has_he) { if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC, sizeof(he_cap->he_cap_elem.mac_cap_info), he_cap->he_cap_elem.mac_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, sizeof(he_cap->he_cap_elem.phy_cap_info), he_cap->he_cap_elem.phy_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, sizeof(he_cap->he_mcs_nss_supp), &he_cap->he_mcs_nss_supp) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, sizeof(he_cap->ppe_thres), he_cap->ppe_thres)) return -ENOBUFS; } if (eht_cap->has_eht && he_cap->has_he) { u8 mcs_nss_size, ppe_thresh_size; u16 ppe_thres_hdr; bool is_ap; is_ap = iftdata->types_mask & BIT(NL80211_IFTYPE_AP) || iftdata->types_mask & BIT(NL80211_IFTYPE_P2P_GO); mcs_nss_size = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, &eht_cap->eht_cap_elem, is_ap); ppe_thres_hdr = get_unaligned_le16(&eht_cap->eht_ppe_thres[0]); ppe_thresh_size = ieee80211_eht_ppe_size(ppe_thres_hdr, eht_cap->eht_cap_elem.phy_cap_info); if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, sizeof(eht_cap->eht_cap_elem.mac_cap_info), eht_cap->eht_cap_elem.mac_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, sizeof(eht_cap->eht_cap_elem.phy_cap_info), eht_cap->eht_cap_elem.phy_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, mcs_nss_size, &eht_cap->eht_mcs_nss_supp) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, ppe_thresh_size, eht_cap->eht_ppe_thres)) return -ENOBUFS; } if (sband->band == NL80211_BAND_6GHZ && nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, sizeof(iftdata->he_6ghz_capa), &iftdata->he_6ghz_capa)) return -ENOBUFS; if (iftdata->vendor_elems.data && iftdata->vendor_elems.len && nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, iftdata->vendor_elems.len, iftdata->vendor_elems.data)) return -ENOBUFS; return 0; } static int nl80211_send_band_rateinfo(struct sk_buff *msg, struct ieee80211_supported_band *sband, bool large) { struct nlattr *nl_rates, *nl_rate; struct ieee80211_rate *rate; int i; /* add HT info */ if (sband->ht_cap.ht_supported && (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, sizeof(sband->ht_cap.mcs), &sband->ht_cap.mcs) || nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, sband->ht_cap.cap) || nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, sband->ht_cap.ampdu_factor) || nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, sband->ht_cap.ampdu_density))) return -ENOBUFS; /* add VHT info */ if (sband->vht_cap.vht_supported && (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, sizeof(sband->vht_cap.vht_mcs), &sband->vht_cap.vht_mcs) || nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, sband->vht_cap.cap))) return -ENOBUFS; if (large && sband->n_iftype_data) { struct nlattr *nl_iftype_data = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_IFTYPE_DATA); const struct ieee80211_sband_iftype_data *iftd; int err; if (!nl_iftype_data) return -ENOBUFS; for_each_sband_iftype_data(sband, i, iftd) { struct nlattr *iftdata; iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; err = nl80211_send_iftype_data(msg, sband, iftd); if (err) return err; nla_nest_end(msg, iftdata); } nla_nest_end(msg, nl_iftype_data); } /* add EDMG info */ if (large && sband->edmg_cap.channels && (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS, sband->edmg_cap.channels) || nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG, sband->edmg_cap.bw_config))) return -ENOBUFS; /* add bitrates */ nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) return -ENOBUFS; for (i = 0; i < sband->n_bitrates; i++) { nl_rate = nla_nest_start_noflag(msg, i); if (!nl_rate) return -ENOBUFS; rate = &sband->bitrates[i]; if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, rate->bitrate)) return -ENOBUFS; if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && nla_put_flag(msg, NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) return -ENOBUFS; nla_nest_end(msg, nl_rate); } nla_nest_end(msg, nl_rates); /* S1G capabilities */ if (sband->band == NL80211_BAND_S1GHZ && sband->s1g_cap.s1g && (nla_put(msg, NL80211_BAND_ATTR_S1G_CAPA, sizeof(sband->s1g_cap.cap), sband->s1g_cap.cap) || nla_put(msg, NL80211_BAND_ATTR_S1G_MCS_NSS_SET, sizeof(sband->s1g_cap.nss_mcs), sband->s1g_cap.nss_mcs))) return -ENOBUFS; return 0; } static int nl80211_send_mgmt_stypes(struct sk_buff *msg, const struct ieee80211_txrx_stypes *mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; enum nl80211_iftype ift; int i; if (!mgmt_stypes) return 0; nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_TX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; stypes = mgmt_stypes[ift].tx; while (stypes) { if ((stypes & 1) && nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, (i << 4) | IEEE80211_FTYPE_MGMT)) return -ENOBUFS; stypes >>= 1; i++; } nla_nest_end(msg, nl_ftypes); } nla_nest_end(msg, nl_ifs); nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_RX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; stypes = mgmt_stypes[ift].rx; while (stypes) { if ((stypes & 1) && nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, (i << 4) | IEEE80211_FTYPE_MGMT)) return -ENOBUFS; stypes >>= 1; i++; } nla_nest_end(msg, nl_ftypes); } nla_nest_end(msg, nl_ifs); return 0; } #define CMD(op, n) \ do { \ if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ goto nla_put_failure; \ } \ } while (0) static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { int i = 0; /* * do *NOT* add anything into this function, new things need to be * advertised only to new versions of userspace that can deal with * the split (and they can't possibly care about new features... */ CMD(add_virtual_intf, NEW_INTERFACE); CMD(change_virtual_intf, SET_INTERFACE); CMD(add_key, NEW_KEY); CMD(start_ap, START_AP); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); CMD(update_mesh_config, SET_MESH_CONFIG); CMD(change_bss, SET_BSS); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); CMD(join_mesh, JOIN_MESH); CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || rdev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } if (rdev->wiphy.max_sched_scan_reqs) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { i++; if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); #endif if (rdev->ops->connect || rdev->ops->auth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } if (rdev->ops->disconnect || rdev->ops->deauth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) goto nla_put_failure; } return i; nla_put_failure: return -ENOBUFS; } static int nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap, struct sk_buff *msg) { struct nlattr *ftm; if (!cap->ftm.supported) return 0; ftm = nla_nest_start_noflag(msg, NL80211_PMSR_TYPE_FTM); if (!ftm) return -ENOBUFS; if (cap->ftm.asap && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_ASAP)) return -ENOBUFS; if (cap->ftm.non_asap && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP)) return -ENOBUFS; if (cap->ftm.request_lci && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI)) return -ENOBUFS; if (cap->ftm.request_civicloc && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, cap->ftm.preambles)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, cap->ftm.bandwidths)) return -ENOBUFS; if (cap->ftm.max_bursts_exponent >= 0 && nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, cap->ftm.max_bursts_exponent)) return -ENOBUFS; if (cap->ftm.max_ftms_per_burst && nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, cap->ftm.max_ftms_per_burst)) return -ENOBUFS; if (cap->ftm.trigger_based && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED)) return -ENOBUFS; if (cap->ftm.non_trigger_based && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED)) return -ENOBUFS; nla_nest_end(msg, ftm); return 0; } static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { const struct cfg80211_pmsr_capabilities *cap = rdev->wiphy.pmsr_capa; struct nlattr *pmsr, *caps; if (!cap) return 0; /* * we don't need to clean up anything here since the caller * will genlmsg_cancel() if we fail */ pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_ATTR_MAX_PEERS, cap->max_peers)) return -ENOBUFS; if (cap->report_ap_tsf && nla_put_flag(msg, NL80211_PMSR_ATTR_REPORT_AP_TSF)) return -ENOBUFS; if (cap->randomize_mac_addr && nla_put_flag(msg, NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR)) return -ENOBUFS; caps = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_TYPE_CAPA); if (!caps) return -ENOBUFS; if (nl80211_send_pmsr_ftm_capa(cap, msg)) return -ENOBUFS; nla_nest_end(msg, caps); nla_nest_end(msg, pmsr); return 0; } static int nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { int i; struct nlattr *nested, *nested_akms; const struct wiphy_iftype_akm_suites *iftype_akms; if (!rdev->wiphy.num_iftype_akm_suites || !rdev->wiphy.iftype_akm_suites) return 0; nested = nla_nest_start(msg, NL80211_ATTR_IFTYPE_AKM_SUITES); if (!nested) return -ENOBUFS; for (i = 0; i < rdev->wiphy.num_iftype_akm_suites; i++) { nested_akms = nla_nest_start(msg, i + 1); if (!nested_akms) return -ENOBUFS; iftype_akms = &rdev->wiphy.iftype_akm_suites[i]; if (nl80211_put_iftypes(msg, NL80211_IFTYPE_AKM_ATTR_IFTYPES, iftype_akms->iftypes_mask)) return -ENOBUFS; if (nla_put(msg, NL80211_IFTYPE_AKM_ATTR_SUITES, sizeof(u32) * iftype_akms->n_akm_suites, iftype_akms->akm_suites)) { return -ENOBUFS; } nla_nest_end(msg, nested_akms); } nla_nest_end(msg, nested); return 0; } static int nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { struct nlattr *supp; if (!rdev->wiphy.tid_config_support.vif && !rdev->wiphy.tid_config_support.peer) return 0; supp = nla_nest_start(msg, NL80211_ATTR_TID_CONFIG); if (!supp) return -ENOSPC; if (rdev->wiphy.tid_config_support.vif && nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_VIF_SUPP, rdev->wiphy.tid_config_support.vif, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; if (rdev->wiphy.tid_config_support.peer && nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_PEER_SUPP, rdev->wiphy.tid_config_support.peer, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; /* for now we just use the same value ... makes more sense */ if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, rdev->wiphy.tid_config_support.max_retry)) goto fail; if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_LONG, rdev->wiphy.tid_config_support.max_retry)) goto fail; nla_nest_end(msg, supp); return 0; fail: nla_nest_cancel(msg, supp); return -ENOBUFS; } static int nl80211_put_sar_specs(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { struct nlattr *sar_capa, *specs, *sub_freq_range; u8 num_freq_ranges; int i; if (!rdev->wiphy.sar_capa) return 0; num_freq_ranges = rdev->wiphy.sar_capa->num_freq_ranges; sar_capa = nla_nest_start(msg, NL80211_ATTR_SAR_SPEC); if (!sar_capa) return -ENOSPC; if (nla_put_u32(msg, NL80211_SAR_ATTR_TYPE, rdev->wiphy.sar_capa->type)) goto fail; specs = nla_nest_start(msg, NL80211_SAR_ATTR_SPECS); if (!specs) goto fail; /* report supported freq_ranges */ for (i = 0; i < num_freq_ranges; i++) { sub_freq_range = nla_nest_start(msg, i + 1); if (!sub_freq_range) goto fail; if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_START_FREQ, rdev->wiphy.sar_capa->freq_ranges[i].start_freq)) goto fail; if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_END_FREQ, rdev->wiphy.sar_capa->freq_ranges[i].end_freq)) goto fail; nla_nest_end(msg, sub_freq_range); } nla_nest_end(msg, specs); nla_nest_end(msg, sar_capa); return 0; fail: nla_nest_cancel(msg, sar_capa); return -ENOBUFS; } static int nl80211_put_mbssid_support(struct wiphy *wiphy, struct sk_buff *msg) { struct nlattr *config; if (!wiphy->mbssid_max_interfaces) return 0; config = nla_nest_start(msg, NL80211_ATTR_MBSSID_CONFIG); if (!config) return -ENOBUFS; if (nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, wiphy->mbssid_max_interfaces)) goto fail; if (wiphy->ema_max_profile_periodicity && nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, wiphy->ema_max_profile_periodicity)) goto fail; nla_nest_end(msg, config); return 0; fail: nla_nest_cancel(msg, config); return -ENOBUFS; } struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; long split_start, band_start, chan_start, capa_start; bool split; }; static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct nl80211_dump_wiphy_state *state) { void *hdr; struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_cmds; enum nl80211_band band; struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = rdev->wiphy.mgmt_stypes; u32 features; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (WARN_ON(!state)) return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&rdev->wiphy)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, cfg80211_rdev_list_generation)) goto nla_put_failure; if (cmd != NL80211_CMD_NEW_WIPHY) goto finish; switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, rdev->wiphy.retry_short) || nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, rdev->wiphy.retry_long) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, rdev->wiphy.frag_threshold) || nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, rdev->wiphy.rts_threshold) || nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, rdev->wiphy.coverage_class) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, rdev->wiphy.max_scan_ssids) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, rdev->wiphy.max_sched_scan_ssids) || nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, rdev->wiphy.max_scan_ie_len) || nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, rdev->wiphy.max_match_sets)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, sizeof(u32) * rdev->wiphy.n_cipher_suites, rdev->wiphy.cipher_suites)) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, rdev->wiphy.max_num_pmkids)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, rdev->wiphy.available_antennas_tx) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, rdev->wiphy.available_antennas_rx)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, rdev->wiphy.probe_resp_offload)) goto nla_put_failure; if ((rdev->wiphy.available_antennas_tx || rdev->wiphy.available_antennas_rx) && rdev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant)) goto nla_put_failure; } } state->split_start++; if (state->split) break; fallthrough; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, rdev->wiphy.interface_modes)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 3: nl_bands = nla_nest_start_noflag(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; for (band = state->band_start; band < (state->split ? NUM_NL80211_BANDS : NL80211_BAND_60GHZ + 1); band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ if (band > NL80211_BAND_5GHZ && !state->split) break; sband = rdev->wiphy.bands[band]; if (!sband) continue; nl_band = nla_nest_start_noflag(msg, band); if (!nl_band) goto nla_put_failure; switch (state->chan_start) { case 0: if (nl80211_send_band_rateinfo(msg, sband, state->split)) goto nla_put_failure; state->chan_start++; if (state->split) break; fallthrough; default: /* add frequencies */ nl_freqs = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) goto nla_put_failure; for (i = state->chan_start - 1; i < sband->n_channels; i++) { nl_freq = nla_nest_start_noflag(msg, i); if (!nl_freq) goto nla_put_failure; chan = &sband->channels[i]; if (nl80211_msg_put_channel( msg, &rdev->wiphy, chan, state->split)) goto nla_put_failure; nla_nest_end(msg, nl_freq); if (state->split) break; } if (i < sband->n_channels) state->chan_start = i + 2; else state->chan_start = 0; nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_band); if (state->split) { /* start again here */ if (state->chan_start) band--; break; } } nla_nest_end(msg, nl_bands); if (band < NUM_NL80211_BANDS) state->band_start = band + 1; else state->band_start = 0; /* if bands & channels are done, continue outside */ if (state->band_start == 0 && state->chan_start == 0) state->split_start++; if (state->split) break; fallthrough; case 4: nl_cmds = nla_nest_start_noflag(msg, NL80211_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; i = nl80211_add_commands_unsplit(rdev, msg); if (i < 0) goto nla_put_failure; if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); CMD(set_qos_map, SET_QOS_MAP); if (rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); CMD(update_connect_params, UPDATE_CONNECT_PARAMS); CMD(update_ft_ies, UPDATE_FT_IES); if (rdev->wiphy.sar_capa) CMD(set_sar_specs, SET_SAR_SPECS); } #undef CMD nla_nest_end(msg, nl_cmds); state->split_start++; if (state->split) break; fallthrough; case 5: if (rdev->ops->remain_on_channel && (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, rdev->wiphy.max_remain_on_channel_duration)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 6: #ifdef CONFIG_PM if (nl80211_send_wowlan(msg, rdev, state->split)) goto nla_put_failure; state->split_start++; if (state->split) break; #else state->split_start++; #endif fallthrough; case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, rdev->wiphy.software_iftypes)) goto nla_put_failure; if (nl80211_put_iface_combinations(&rdev->wiphy, msg, state->split)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 8: if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, rdev->wiphy.ap_sme_capa)) goto nla_put_failure; features = rdev->wiphy.features; /* * We can only add the per-channel limit information if the * dump is split, otherwise it makes it too big. Therefore * only advertise it in that case. */ if (state->split) features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; if (rdev->wiphy.ht_capa_mod_mask && nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, sizeof(*rdev->wiphy.ht_capa_mod_mask), rdev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && rdev->wiphy.max_acl_mac_addrs && nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, rdev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; /* * Any information below this point is only available to * applications that can deal with it being split. This * helps ensure that newly added capabilities don't break * older tools by overrunning their buffers. * * We still increment split_start so that in the split * case we'll continue with more data in the next round, * but break unconditionally so unsplit data stops here. */ if (state->split) state->split_start++; else state->split_start = 0; break; case 9: if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS, rdev->wiphy.max_sched_scan_plans) || nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL, rdev->wiphy.max_sched_scan_plan_interval) || nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, rdev->wiphy.max_sched_scan_plan_iterations)) goto nla_put_failure; if (rdev->wiphy.extended_capabilities && (nla_put(msg, NL80211_ATTR_EXT_CAPA, rdev->wiphy.extended_capabilities_len, rdev->wiphy.extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, rdev->wiphy.extended_capabilities_len, rdev->wiphy.extended_capabilities_mask))) goto nla_put_failure; if (rdev->wiphy.vht_capa_mod_mask && nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, sizeof(*rdev->wiphy.vht_capa_mod_mask), rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, rdev->wiphy.perm_addr)) goto nla_put_failure; if (!is_zero_ether_addr(rdev->wiphy.addr_mask) && nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN, rdev->wiphy.addr_mask)) goto nla_put_failure; if (rdev->wiphy.n_addresses > 1) { void *attr; attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS); if (!attr) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_addresses; i++) if (nla_put(msg, i + 1, ETH_ALEN, rdev->wiphy.addresses[i].addr)) goto nla_put_failure; nla_nest_end(msg, attr); } state->split_start++; break; case 10: if (nl80211_send_coalesce(msg, rdev)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; if (rdev->wiphy.max_ap_assoc_sta && nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA, rdev->wiphy.max_ap_assoc_sta)) goto nla_put_failure; state->split_start++; break; case 11: if (rdev->wiphy.n_vendor_commands) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_VENDOR_DATA); if (!nested) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { info = &rdev->wiphy.vendor_commands[i].info; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } if (rdev->wiphy.n_vendor_events) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_VENDOR_EVENTS); if (!nested) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_vendor_events; i++) { info = &rdev->wiphy.vendor_events[i]; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } state->split_start++; break; case 12: if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, rdev->wiphy.max_num_csa_counters)) goto nla_put_failure; if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; if (rdev->wiphy.max_sched_scan_reqs && nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_MAX_REQS, rdev->wiphy.max_sched_scan_reqs)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, sizeof(rdev->wiphy.ext_features), rdev->wiphy.ext_features)) goto nla_put_failure; if (rdev->wiphy.bss_select_support) { struct nlattr *nested; u32 bss_select_support = rdev->wiphy.bss_select_support; nested = nla_nest_start_noflag(msg, NL80211_ATTR_BSS_SELECT); if (!nested) goto nla_put_failure; i = 0; while (bss_select_support) { if ((bss_select_support & 1) && nla_put_flag(msg, i)) goto nla_put_failure; i++; bss_select_support >>= 1; } nla_nest_end(msg, nested); } state->split_start++; break; case 13: if (rdev->wiphy.num_iftype_ext_capab && rdev->wiphy.iftype_ext_capab) { struct nlattr *nested_ext_capab, *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_IFTYPE_EXT_CAPA); if (!nested) goto nla_put_failure; for (i = state->capa_start; i < rdev->wiphy.num_iftype_ext_capab; i++) { const struct wiphy_iftype_ext_capab *capab; capab = &rdev->wiphy.iftype_ext_capab[i]; nested_ext_capab = nla_nest_start_noflag(msg, i); if (!nested_ext_capab || nla_put_u32(msg, NL80211_ATTR_IFTYPE, capab->iftype) || nla_put(msg, NL80211_ATTR_EXT_CAPA, capab->extended_capabilities_len, capab->extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, capab->extended_capabilities_len, capab->extended_capabilities_mask)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO && (nla_put_u16(msg, NL80211_ATTR_EML_CAPABILITY, capab->eml_capabilities) || nla_put_u16(msg, NL80211_ATTR_MLD_CAPA_AND_OPS, capab->mld_capa_and_ops))) goto nla_put_failure; nla_nest_end(msg, nested_ext_capab); if (state->split) break; } nla_nest_end(msg, nested); if (i < rdev->wiphy.num_iftype_ext_capab) { state->capa_start = i + 1; break; } } if (nla_put_u32(msg, NL80211_ATTR_BANDS, rdev->wiphy.nan_supported_bands)) goto nla_put_failure; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) { struct cfg80211_txq_stats txqstats = {}; int res; res = rdev_get_txq_stats(rdev, NULL, &txqstats); if (!res && !nl80211_put_txq_stats(msg, &txqstats, NL80211_ATTR_TXQ_STATS)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_LIMIT, rdev->wiphy.txq_limit)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_MEMORY_LIMIT, rdev->wiphy.txq_memory_limit)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_QUANTUM, rdev->wiphy.txq_quantum)) goto nla_put_failure; } state->split_start++; break; case 14: if (nl80211_send_pmsr_capa(rdev, msg)) goto nla_put_failure; state->split_start++; break; case 15: if (rdev->wiphy.akm_suites && nla_put(msg, NL80211_ATTR_AKM_SUITES, sizeof(u32) * rdev->wiphy.n_akm_suites, rdev->wiphy.akm_suites)) goto nla_put_failure; if (nl80211_put_iftype_akm_suites(rdev, msg)) goto nla_put_failure; if (nl80211_put_tid_config_support(rdev, msg)) goto nla_put_failure; state->split_start++; break; case 16: if (nl80211_put_sar_specs(rdev, msg)) goto nla_put_failure; if (nl80211_put_mbssid_support(&rdev->wiphy, msg)) goto nla_put_failure; if (nla_put_u16(msg, NL80211_ATTR_MAX_NUM_AKM_SUITES, rdev->wiphy.max_num_akm_suites)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO) nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT); if (rdev->wiphy.hw_timestamp_max_peers && nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, rdev->wiphy.hw_timestamp_max_peers)) goto nla_put_failure; /* done */ state->split_start = 0; break; } finish: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); int ret; if (!tb) return -ENOMEM; ret = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) { ret = 0; goto out; } state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; if (tb[NL80211_ATTR_WIPHY]) state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); if (tb[NL80211_ATTR_WDEV]) state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; if (tb[NL80211_ATTR_IFINDEX]) { struct net_device *netdev; struct cfg80211_registered_device *rdev; int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) { ret = -ENODEV; goto out; } if (netdev->ieee80211_ptr) { rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } } ret = 0; out: kfree(tb); return ret; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; struct cfg80211_registered_device *rdev; rtnl_lock(); if (!state) { state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) { rtnl_unlock(); return -ENOMEM; } state->filter_wiphy = -1; ret = nl80211_dump_wiphy_parse(skb, cb, state); if (ret) { kfree(state); rtnl_unlock(); return ret; } cb->args[0] = (long)state; } for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wiphy != -1 && state->filter_wiphy != rdev->wiphy_idx) continue; wiphy_lock(&rdev->wiphy); /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, state); if (ret < 0) { /* * If sending the wiphy data didn't fit (ENOBUFS * or EMSGSIZE returned), this SKB is still * empty (so it's not too big because another * wiphy dataset is already in the skb) and * we've not tried to adjust the dump allocation * yet ... then adjust the alloc size to be * bigger, and return 1 but with the empty skb. * This results in an empty message being RX'ed * in userspace, but that is ignored. * * We can then retry with the larger buffer. */ if ((ret == -ENOBUFS || ret == -EMSGSIZE) && !skb->len && !state->split && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; state->split_start = 0; wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return 1; } idx--; break; } } while (state->split_start > 0); wiphy_unlock(&rdev->wiphy); break; } rtnl_unlock(); state->start = idx; return skb->len; } static int nl80211_dump_wiphy_done(struct netlink_callback *cb) { kfree((void *)cb->args[0]); return 0; } static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg, info->snd_portid, info->snd_seq, 0, &state) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { [NL80211_TXQ_ATTR_QUEUE] = { .type = NLA_U8 }, [NL80211_TXQ_ATTR_TXOP] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_CWMIN] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_CWMAX] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_AIFS] = { .type = NLA_U8 }, }; static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { u8 ac; if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); if (ac >= NL80211_NUM_ACS) return -EINVAL; txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) { /* * You can only set the channel explicitly for some interfaces, * most have their channel managed via their respective * "establish a connection" command (connect, join, ...) * * For AP/GO and mesh mode, the channel can be set with the * channel userspace API, but is only stored and passed to the * low-level driver when the AP starts or the mesh is joined. * This is for backward compatibility, userspace can also give * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to * whatever else is going on, so they have their own special * operation to set the monitor channel if possible. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || wdev->iftype == NL80211_IFTYPE_MESH_POINT || wdev->iftype == NL80211_IFTYPE_MONITOR || wdev->iftype == NL80211_IFTYPE_P2P_GO; } static int nl80211_parse_punct_bitmap(struct cfg80211_registered_device *rdev, struct genl_info *info, const struct cfg80211_chan_def *chandef, u16 *punct_bitmap) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT)) return -EINVAL; *punct_bitmap = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); if (!cfg80211_valid_disable_subchannel_bitmap(punct_bitmap, chandef)) return -EINVAL; return 0; } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_chan_def *chandef) { struct netlink_ext_ack *extack = info->extack; struct nlattr **attrs = info->attrs; u32 control_freq; if (!attrs[NL80211_ATTR_WIPHY_FREQ]) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], "Frequency is missing"); return -EINVAL; } control_freq = MHZ_TO_KHZ( nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) control_freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); memset(chandef, 0, sizeof(*chandef)); chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = KHZ_TO_MHZ(control_freq); chandef->freq1_offset = control_freq % 1000; chandef->center_freq2 = 0; /* Primary channel not allowed */ if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], "Channel is disabled"); return -EINVAL; } if (attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { enum nl80211_channel_type chantype; chantype = nla_get_u32(attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); switch (chantype) { case NL80211_CHAN_NO_HT: case NL80211_CHAN_HT20: case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: cfg80211_chandef_create(chandef, chandef->chan, chantype); /* user input for center_freq is incorrect */ if (attrs[NL80211_ATTR_CENTER_FREQ1] && chandef->center_freq1 != nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1])) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CENTER_FREQ1], "bad center frequency 1"); return -EINVAL; } /* center_freq2 must be zero */ if (attrs[NL80211_ATTR_CENTER_FREQ2] && nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2])) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CENTER_FREQ2], "center frequency 2 can't be used"); return -EINVAL; } break; default: NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE], "invalid channel type"); return -EINVAL; } } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); if (chandef->chan->band == NL80211_BAND_S1GHZ) { /* User input error for channel width doesn't match channel */ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CHANNEL_WIDTH], "bad channel width"); return -EINVAL; } } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]) chandef->freq1_offset = nla_get_u32( attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]); else chandef->freq1_offset = 0; } if (attrs[NL80211_ATTR_CENTER_FREQ2]) chandef->center_freq2 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]); } if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { chandef->edmg.channels = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) chandef->edmg.bw_config = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } else { chandef->edmg.bw_config = 0; chandef->edmg.channels = 0; } if (!cfg80211_chandef_valid(chandef)) { NL_SET_ERR_MSG(extack, "invalid channel definition"); return -EINVAL; } if (!cfg80211_chandef_usable(&rdev->wiphy, chandef, IEEE80211_CHAN_DISABLED)) { NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); return -EINVAL; } if ((chandef->width == NL80211_CHAN_WIDTH_5 || chandef->width == NL80211_CHAN_WIDTH_10) && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) { NL_SET_ERR_MSG(extack, "5/10 MHz not supported"); return -EINVAL; } return 0; } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct genl_info *info, int _link_id) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; struct wireless_dev *wdev = NULL; int link_id = _link_id; if (dev) wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; if (wdev) iftype = wdev->iftype; if (link_id < 0) { if (wdev && wdev->valid_links) return -EINVAL; link_id = 0; } result = nl80211_parse_chandef(rdev, info, &chandef); if (result) return result; switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, iftype)) return -EINVAL; if (wdev->links[link_id].ap.beacon_interval) { struct ieee80211_channel *cur_chan; if (!dev || !rdev->ops->set_ap_chanwidth || !(rdev->wiphy.features & NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) return -EBUSY; /* Only allow dynamic channel width changes */ cur_chan = wdev->links[link_id].ap.chandef.chan; if (chandef.chan != cur_chan) return -EBUSY; result = rdev_set_ap_chanwidth(rdev, dev, link_id, &chandef); if (result) return result; wdev->links[link_id].ap.chandef = chandef; } else { wdev->u.ap.preset_chandef = chandef; } return 0; case NL80211_IFTYPE_MESH_POINT: return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: return cfg80211_set_monitor_channel(rdev, &chandef); default: break; } return -EINVAL; } static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; struct net_device *netdev = NULL; struct wireless_dev *wdev; int result = 0, rem_txq_params = 0; struct nlattr *nl_txq_params; u32 changed; u8 retry_short = 0, retry_long = 0; u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0; rtnl_lock(); /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is * done for backward compatibility -- previously * setting the channel was done per wiphy, but now * it is per netdev. Previous userland like hostapd * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy); else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(rdev)) { rtnl_unlock(); return PTR_ERR(rdev); } wdev = NULL; netdev = NULL; result = 0; } else wdev = netdev->ieee80211_ptr; wiphy_lock(&rdev->wiphy); /* * end workaround code, by now the rdev is available * and locked, and wdev may or may not be NULL. */ if (info->attrs[NL80211_ATTR_WIPHY_NAME]) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); rtnl_unlock(); if (result) goto out; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; if (!rdev->ops->set_txq_params) { result = -EOPNOTSUPP; goto out; } if (!netdev) { result = -EINVAL; goto out; } if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { result = -EINVAL; goto out; } if (!netif_running(netdev)) { result = -ENETDOWN; goto out; } nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { result = nla_parse_nested_deprecated(tb, NL80211_TXQ_ATTR_MAX, nl_txq_params, txq_params_policy, info->extack); if (result) goto out; result = parse_txq_params(tb, &txq_params); if (result) goto out; txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (txq_params.link_id >= 0 && !(netdev->ieee80211_ptr->valid_links & BIT(txq_params.link_id))) result = -ENOLINK; else if (txq_params.link_id >= 0 && !netdev->ieee80211_ptr->valid_links) result = -EINVAL; else result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) goto out; } } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { int link_id = nl80211_link_id_or_invalid(info->attrs); if (wdev) { result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, info, link_id); } else { result = __nl80211_set_channel(rdev, netdev, info, link_id); } if (result) goto out; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { struct wireless_dev *txp_wdev = wdev; enum nl80211_tx_power_setting type; int idx, mbm = 0; if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; if (!rdev->ops->set_tx_power) { result = -EOPNOTSUPP; goto out; } idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && (type != NL80211_TX_POWER_AUTOMATIC)) { result = -EINVAL; goto out; } if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; mbm = nla_get_u32(info->attrs[idx]); } result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) goto out; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { u32 tx_ant, rx_ant; if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || !rdev->ops->set_antenna) { result = -EOPNOTSUPP; goto out; } tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { result = -EINVAL; goto out; } tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) goto out; } changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { retry_short = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]); changed |= WIPHY_PARAM_RETRY_SHORT; } if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) { retry_long = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]); changed |= WIPHY_PARAM_RETRY_LONG; } if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); if (frag_threshold < 256) { result = -EINVAL; goto out; } if (frag_threshold != (u32) -1) { /* * Fragments (apart from the last one) are required to * have even length. Make the fragmentation code * simpler by stripping LSB should someone try to use * odd threshold value. */ frag_threshold &= ~0x1; } changed |= WIPHY_PARAM_FRAG_THRESHOLD; } if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) { rts_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]); changed |= WIPHY_PARAM_RTS_THRESHOLD; } if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { result = -EINVAL; goto out; } coverage_class = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); changed |= WIPHY_PARAM_COVERAGE_CLASS; } if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) { result = -EOPNOTSUPP; goto out; } changed |= WIPHY_PARAM_DYN_ACK; } if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) { result = -EOPNOTSUPP; goto out; } txq_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_LIMIT]); changed |= WIPHY_PARAM_TXQ_LIMIT; } if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) { result = -EOPNOTSUPP; goto out; } txq_memory_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]); changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT; } if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) { result = -EOPNOTSUPP; goto out; } txq_quantum = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_QUANTUM]); changed |= WIPHY_PARAM_TXQ_QUANTUM; } if (changed) { u8 old_retry_short, old_retry_long; u32 old_frag_threshold, old_rts_threshold; u8 old_coverage_class; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; if (!rdev->ops->set_wiphy_params) { result = -EOPNOTSUPP; goto out; } old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; old_frag_threshold = rdev->wiphy.frag_threshold; old_rts_threshold = rdev->wiphy.rts_threshold; old_coverage_class = rdev->wiphy.coverage_class; old_txq_limit = rdev->wiphy.txq_limit; old_txq_memory_limit = rdev->wiphy.txq_memory_limit; old_txq_quantum = rdev->wiphy.txq_quantum; if (changed & WIPHY_PARAM_RETRY_SHORT) rdev->wiphy.retry_short = retry_short; if (changed & WIPHY_PARAM_RETRY_LONG) rdev->wiphy.retry_long = retry_long; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) rdev->wiphy.frag_threshold = frag_threshold; if (changed & WIPHY_PARAM_RTS_THRESHOLD) rdev->wiphy.rts_threshold = rts_threshold; if (changed & WIPHY_PARAM_COVERAGE_CLASS) rdev->wiphy.coverage_class = coverage_class; if (changed & WIPHY_PARAM_TXQ_LIMIT) rdev->wiphy.txq_limit = txq_limit; if (changed & WIPHY_PARAM_TXQ_MEMORY_LIMIT) rdev->wiphy.txq_memory_limit = txq_memory_limit; if (changed & WIPHY_PARAM_TXQ_QUANTUM) rdev->wiphy.txq_quantum = txq_quantum; result = rdev_set_wiphy_params(rdev, changed); if (result) { rdev->wiphy.retry_short = old_retry_short; rdev->wiphy.retry_long = old_retry_long; rdev->wiphy.frag_threshold = old_frag_threshold; rdev->wiphy.rts_threshold = old_rts_threshold; rdev->wiphy.coverage_class = old_coverage_class; rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; goto out; } } result = 0; out: wiphy_unlock(&rdev->wiphy); return result; } int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef) { if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chandef->chan->center_freq)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, chandef->chan->freq_offset)) return -ENOBUFS; switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, cfg80211_get_chandef_type(chandef))) return -ENOBUFS; break; default: break; } if (nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, chandef->width)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ1, chandef->center_freq1)) return -ENOBUFS; if (chandef->center_freq2 && nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2)) return -ENOBUFS; return 0; } EXPORT_SYMBOL(nl80211_send_chandef); static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_commands cmd) { struct net_device *dev = wdev->netdev; void *hdr; lockdep_assert_wiphy(&rdev->wiphy); WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && cmd != NL80211_CMD_DEL_INTERFACE && cmd != NL80211_CMD_SET_INTERFACE); hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; if (dev && (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name))) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2)) || nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) goto nla_put_failure; if (rdev->ops->get_channel && !wdev->valid_links) { struct cfg80211_chan_def chandef = {}; int ret; ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; } if (rdev->ops->get_tx_power) { int dbm, ret; ret = rdev_get_tx_power(rdev, wdev, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) goto nla_put_failure; } switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) goto nla_put_failure; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->u.client.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, wdev->u.client.ssid)) goto nla_put_failure; break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, wdev->u.ibss.ssid)) goto nla_put_failure; break; default: /* nothing */ break; } if (rdev->ops->get_txq_stats) { struct cfg80211_txq_stats txqstats = {}; int ret = rdev_get_txq_stats(rdev, wdev, &txqstats); if (ret == 0 && !nl80211_put_txq_stats(msg, &txqstats, NL80211_ATTR_TXQ_STATS)) goto nla_put_failure; } if (wdev->valid_links) { unsigned int link_id; struct nlattr *links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!links) goto nla_put_failure; for_each_valid_link(wdev, link_id) { struct nlattr *link = nla_nest_start(msg, link_id + 1); struct cfg80211_chan_def chandef = {}; int ret; if (!link) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev->links[link_id].addr)) goto nla_put_failure; ret = rdev_get_channel(rdev, wdev, link_id, &chandef); if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; nla_nest_end(msg, link); } nla_nest_end(msg, links); } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) { int wp_idx = 0; int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int ret; rtnl_lock(); if (!cb->args[2]) { struct nl80211_dump_wiphy_state state = { .filter_wiphy = -1, }; ret = nl80211_dump_wiphy_parse(skb, cb, &state); if (ret) goto out_unlock; filter_wiphy = state.filter_wiphy; /* * if filtering, set cb->args[2] to +1 since 0 is the default * value needed to determine that parsing is necessary. */ if (filter_wiphy >= 0) cb->args[2] = filter_wiphy + 1; else cb->args[2] = -1; } else if (cb->args[2] > 0) { filter_wiphy = cb->args[2] - 1; } for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { wp_idx++; continue; } if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx) continue; if_idx = 0; wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; } if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { wiphy_unlock(&rdev->wiphy); goto out; } if_idx++; } wiphy_unlock(&rdev->wiphy); wp_idx++; } out: cb->args[0] = wp_idx; cb->args[1] = if_idx; ret = skb->len; out_unlock: rtnl_unlock(); return ret; } static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_FCSFAIL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_PLCPFAIL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) { struct nlattr *flags[NL80211_MNTR_FLAG_MAX + 1]; int flag; *mntrflags = 0; if (!nla) return -EINVAL; if (nla_parse_nested_deprecated(flags, NL80211_MNTR_FLAG_MAX, nla, mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) if (flags[flag]) *mntrflags |= (1<<flag); *mntrflags |= MONITOR_FLAG_CHANGED; return 0; } static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev, enum nl80211_iftype type, struct genl_info *info, struct vif_params *params) { bool change = false; int err; if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], ¶ms->flags); if (err) return err; change = true; } if (params->flags & MONITOR_FLAG_ACTIVE && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) { const u8 *mumimo_groups; u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) return -EOPNOTSUPP; mumimo_groups = nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]); /* bits 0 and 63 are reserved and must be zero */ if ((mumimo_groups[0] & BIT(0)) || (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7))) return -EINVAL; params->vht_mumimo_groups = mumimo_groups; change = true; } if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) { u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) return -EOPNOTSUPP; params->vht_mumimo_follow_addr = nla_data(info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]); change = true; } return change ? 1 : 0; } static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, struct net_device *netdev, u8 use_4addr, enum nl80211_iftype iftype) { if (!use_4addr) { if (netdev && netif_is_bridge_port(netdev)) return -EBUSY; return 0; } switch (iftype) { case NL80211_IFTYPE_AP_VLAN: if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP) return 0; break; case NL80211_IFTYPE_STATION: if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_STATION) return 0; break; default: break; } return -EOPNOTSUPP; } static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; int err; enum nl80211_iftype otype, ntype; struct net_device *dev = info->user_ptr[1]; bool change = false; memset(¶ms, 0, sizeof(params)); otype = ntype = dev->ieee80211_ptr->iftype; if (info->attrs[NL80211_ATTR_IFTYPE]) { ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); if (otype != ntype) change = true; } if (info->attrs[NL80211_ATTR_MESH_ID]) { struct wireless_dev *wdev = dev->ieee80211_ptr; if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; if (netif_running(dev)) return -EBUSY; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); } if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); change = true; err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); if (err) return err; } else { params.use_4addr = -1; } err = nl80211_parse_mon_options(rdev, ntype, info, ¶ms); if (err < 0) return err; if (err > 0) change = true; if (change) err = cfg80211_change_iface(rdev, dev, ntype, ¶ms); else err = 0; if (!err && params.use_4addr != -1) dev->ieee80211_ptr->use_4addr = params.use_4addr; if (change && !err) { struct wireless_dev *wdev = dev->ieee80211_ptr; nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE); } return err; } static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; struct wireless_dev *wdev; struct sk_buff *msg; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; memset(¶ms, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) return -EINVAL; if (info->attrs[NL80211_ATTR_IFTYPE]) type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); if (!rdev->ops->add_virtual_intf) return -EOPNOTSUPP; if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); if (!is_valid_ether_addr(params.macaddr)) return -EADDRNOTAVAIL; } if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); if (err) return err; } if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0)) return -EOPNOTSUPP; err = nl80211_parse_mon_options(rdev, type, info, ¶ms); if (err < 0) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), NET_NAME_USER, type, ¶ms); if (WARN_ON(!wdev)) { nlmsg_free(msg); return -EPROTO; } else if (IS_ERR(wdev)) { nlmsg_free(msg); return PTR_ERR(wdev); } if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->owner_nlportid = info->snd_portid; switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) break; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); break; case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ cfg80211_init_wdev(wdev); cfg80211_register_wdev(rdev, wdev); break; default: break; } if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int ret; /* to avoid failing a new interface creation due to pending removal */ cfg80211_destroy_ifaces(rdev); wiphy_lock(&rdev->wiphy); ret = _nl80211_new_interface(skb, info); wiphy_unlock(&rdev->wiphy); return ret; } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; /* * We hold RTNL, so this is safe, without RTNL opencount cannot * reach 0, and thus the rdev cannot be deleted. * * We need to do it for the dev_close(), since that will call * the netdev notifiers, and we need to acquire the mutex there * but don't know if we get there from here or from some other * place (e.g. "ip link set ... down"). */ mutex_unlock(&rdev->wiphy.mtx); /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it * to check if it needs to do dev_put(). Otherwise it crashes * since the wdev has been freed, unlike with a netdev where * we need the dev_put() for the netdev to really be freed. */ if (!wdev->netdev) info->user_ptr[1] = NULL; else dev_close(wdev->netdev); mutex_lock(&rdev->wiphy.mtx); return cfg80211_remove_virtual_intf(rdev, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 noack_map; if (!info->attrs[NL80211_ATTR_NOACK_MAP]) return -EINVAL; if (!rdev->ops->set_noack_map) return -EOPNOTSUPP; noack_map = nla_get_u16(info->attrs[NL80211_ATTR_NOACK_MAP]); return rdev_set_noack_map(rdev, dev, noack_map); } static int nl80211_validate_key_link_id(struct genl_info *info, struct wireless_dev *wdev, int link_id, bool pairwise) { if (pairwise) { if (link_id != -1) { GENL_SET_ERR_MSG(info, "link ID not allowed for pairwise key"); return -EINVAL; } return 0; } if (wdev->valid_links) { if (link_id == -1) { GENL_SET_ERR_MSG(info, "link ID must for MLO group key"); return -EINVAL; } if (!(wdev->valid_links & BIT(link_id))) { GENL_SET_ERR_MSG(info, "invalid link ID for MLO group key"); return -EINVAL; } } else if (link_id != -1) { GENL_SET_ERR_MSG(info, "link ID not allowed for non-MLO group key"); return -EINVAL; } return 0; } struct get_key_cookie { struct sk_buff *msg; int error; int idx; }; static void get_key_callback(void *c, struct key_params *params) { struct nlattr *key; struct get_key_cookie *cookie = c; if ((params->key && nla_put(cookie->msg, NL80211_ATTR_KEY_DATA, params->key_len, params->key)) || (params->seq && nla_put(cookie->msg, NL80211_ATTR_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && nla_put_u32(cookie->msg, NL80211_ATTR_KEY_CIPHER, params->cipher))) goto nla_put_failure; key = nla_nest_start_noflag(cookie->msg, NL80211_ATTR_KEY); if (!key) goto nla_put_failure; if ((params->key && nla_put(cookie->msg, NL80211_KEY_DATA, params->key_len, params->key)) || (params->seq && nla_put(cookie->msg, NL80211_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && nla_put_u32(cookie->msg, NL80211_KEY_CIPHER, params->cipher))) goto nla_put_failure; if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx)) goto nla_put_failure; nla_nest_end(cookie->msg, key); return; nla_put_failure: cookie->error = 1; } static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; u8 key_idx = 0; const u8 *mac_addr = NULL; bool pairwise; struct get_key_cookie cookie = { .error = 0, }; void *hdr; struct sk_buff *msg; bool bigtk_support = false; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION)) bigtk_support = true; if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) bigtk_support = true; if (info->attrs[NL80211_ATTR_KEY_IDX]) { key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); if (key_idx >= 6 && key_idx <= 7 && !bigtk_support) { GENL_SET_ERR_MSG(info, "BIGTK not supported"); return -EINVAL; } } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); pairwise = !!mac_addr; if (info->attrs[NL80211_ATTR_KEY_TYPE]) { u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (kt != NL80211_KEYTYPE_GROUP && kt != NL80211_KEYTYPE_PAIRWISE) return -EINVAL; pairwise = kt == NL80211_KEYTYPE_PAIRWISE; } if (!rdev->ops->get_key) return -EOPNOTSUPP; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) return -ENOENT; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_idx)) goto nla_put_failure; if (mac_addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; err = nl80211_validate_key_link_id(info, wdev, link_id, pairwise); if (err) goto free_msg; err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr, &cookie, get_key_callback); if (err) goto free_msg; if (cookie.error) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct key_parse key; int err; struct net_device *dev = info->user_ptr[1]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (key.idx < 0) return -EINVAL; /* Only support setting default key and * Extended Key ID action NL80211_KEY_SET_TX. */ if (!key.def && !key.defmgmt && !key.defbeacon && !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; if (key.def) { if (!rdev->ops->set_default_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; err = rdev_set_default_key(rdev, dev, link_id, key.idx, key.def_uni, key.def_multi); if (err) return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = key.idx; #endif return 0; } else if (key.defmgmt) { if (key.def_uni || !key.def_multi) return -EINVAL; if (!rdev->ops->set_default_mgmt_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); if (err) return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_mgmt_key = key.idx; #endif return 0; } else if (key.defbeacon) { if (key.def_uni || !key.def_multi) return -EINVAL; if (!rdev->ops->set_default_beacon_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { u8 *mac_addr = NULL; if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!mac_addr || key.idx < 0 || key.idx > 1) return -EINVAL; err = nl80211_validate_key_link_id(info, wdev, link_id, true); if (err) return err; return rdev_add_key(rdev, dev, link_id, key.idx, NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); } return -EINVAL; } static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct key_parse key; const u8 *mac_addr = NULL; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (!key.p.key) { GENL_SET_ERR_MSG(info, "no key"); return -EINVAL; } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (key.type == -1) { if (mac_addr) key.type = NL80211_KEYTYPE_PAIRWISE; else key.type = NL80211_KEYTYPE_GROUP; } /* for now */ if (key.type != NL80211_KEYTYPE_PAIRWISE && key.type != NL80211_KEYTYPE_GROUP) { GENL_SET_ERR_MSG(info, "key type not pairwise or group"); return -EINVAL; } if (key.type == NL80211_KEYTYPE_GROUP && info->attrs[NL80211_ATTR_VLAN_ID]) key.p.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (!rdev->ops->add_key) return -EOPNOTSUPP; if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr)) { GENL_SET_ERR_MSG(info, "key setting validation failed"); return -EINVAL; } err = nl80211_key_allowed(wdev); if (err) GENL_SET_ERR_MSG(info, "key not allowed"); if (!err) err = nl80211_validate_key_link_id(info, wdev, link_id, key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) { err = rdev_add_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); if (err) GENL_SET_ERR_MSG(info, "key addition failed"); } return err; } static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; u8 *mac_addr = NULL; struct key_parse key; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (key.type == -1) { if (mac_addr) key.type = NL80211_KEYTYPE_PAIRWISE; else key.type = NL80211_KEYTYPE_GROUP; } /* for now */ if (key.type != NL80211_KEYTYPE_PAIRWISE && key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; if (!cfg80211_valid_key_idx(rdev, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE)) return -EINVAL; if (!rdev->ops->del_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; if (!err) err = nl80211_validate_key_link_id(info, wdev, link_id, key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) err = rdev_del_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr); #ifdef CONFIG_CFG80211_WEXT if (!err) { if (key.idx == wdev->wext.default_key) wdev->wext.default_key = -1; else if (key.idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } #endif return err; } /* This function returns an error or the number of nested attributes */ static int validate_acl_mac_addrs(struct nlattr *nl_attr) { struct nlattr *attr; int n_entries = 0, tmp; nla_for_each_nested(attr, nl_attr, tmp) { if (nla_len(attr) != ETH_ALEN) return -EINVAL; n_entries++; } return n_entries; } /* * This function parses ACL information and allocates memory for ACL data. * On successful return, the calling function is responsible to free the * ACL buffer returned by this function. */ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, struct genl_info *info) { enum nl80211_acl_policy acl_policy; struct nlattr *attr; struct cfg80211_acl_data *acl; int i = 0, n_entries, tmp; if (!wiphy->max_acl_mac_addrs) return ERR_PTR(-EOPNOTSUPP); if (!info->attrs[NL80211_ATTR_ACL_POLICY]) return ERR_PTR(-EINVAL); acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) return ERR_PTR(-EINVAL); if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) return ERR_PTR(-EINVAL); n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); if (n_entries < 0) return ERR_PTR(n_entries); if (n_entries > wiphy->max_acl_mac_addrs) return ERR_PTR(-ENOTSUPP); acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL); if (!acl) return ERR_PTR(-ENOMEM); acl->n_acl_entries = n_entries; nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); i++; } acl->acl_policy = acl_policy; return acl; } static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_acl_data *acl; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!dev->ieee80211_ptr->links[0].ap.beacon_interval) return -EINVAL; acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(acl)) return PTR_ERR(acl); err = rdev_set_mac_acl(rdev, dev, acl); kfree(acl); return err; } static u32 rateset_to_mask(struct ieee80211_supported_band *sband, u8 *rates, u8 rates_len) { u8 i; u32 mask = 0; for (i = 0; i < rates_len; i++) { int rate = (rates[i] & 0x7f) * 5; int ridx; for (ridx = 0; ridx < sband->n_bitrates; ridx++) { struct ieee80211_rate *srate = &sband->bitrates[ridx]; if (rate == srate->bitrate) { mask |= 1 << ridx; break; } } if (ridx == sband->n_bitrates) return 0; /* rate not found */ } return mask; } static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, u8 *rates, u8 rates_len, u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) { u8 i; memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); for (i = 0; i < rates_len; i++) { int ridx, rbit; ridx = rates[i] / 8; rbit = BIT(rates[i] % 8); /* check validity */ if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) return false; /* check availability */ ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else return false; } return true; } static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) { u16 mcs_mask = 0; switch (vht_mcs_map) { case IEEE80211_VHT_MCS_NOT_SUPPORTED: break; case IEEE80211_VHT_MCS_SUPPORT_0_7: mcs_mask = 0x00FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: mcs_mask = 0x01FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: mcs_mask = 0x03FF; break; default: break; } return mcs_mask; } static void vht_build_mcs_mask(u16 vht_mcs_map, u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) { u8 nss; for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); vht_mcs_map >>= 2; } } static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, struct nl80211_txrate_vht *txrate, u16 mcs[NL80211_VHT_NSS_MAX]) { u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; u8 i; if (!sband->vht_cap.vht_supported) return false; memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); /* Build vht_mcs_mask from VHT capabilities */ vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) mcs[i] = txrate->mcs[i]; else return false; } return true; } static u16 he_mcs_map_to_mcs_mask(u8 he_mcs_map) { switch (he_mcs_map) { case IEEE80211_HE_MCS_NOT_SUPPORTED: return 0; case IEEE80211_HE_MCS_SUPPORT_0_7: return 0x00FF; case IEEE80211_HE_MCS_SUPPORT_0_9: return 0x03FF; case IEEE80211_HE_MCS_SUPPORT_0_11: return 0xFFF; default: break; } return 0; } static void he_build_mcs_mask(u16 he_mcs_map, u16 he_mcs_mask[NL80211_HE_NSS_MAX]) { u8 nss; for (nss = 0; nss < NL80211_HE_NSS_MAX; nss++) { he_mcs_mask[nss] = he_mcs_map_to_mcs_mask(he_mcs_map & 0x03); he_mcs_map >>= 2; } } static u16 he_get_txmcsmap(struct genl_info *info, unsigned int link_id, const struct ieee80211_sta_he_cap *he_cap) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def *chandef; __le16 tx_mcs; chandef = wdev_chandef(wdev, link_id); if (!chandef) { /* * This is probably broken, but we never maintained * a chandef in these cases, so it always was. */ return le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); } switch (chandef->width) { case NL80211_CHAN_WIDTH_80P80: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80; break; case NL80211_CHAN_WIDTH_160: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_160; break; default: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80; break; } return le16_to_cpu(tx_mcs); } static bool he_set_mcs_mask(struct genl_info *info, struct wireless_dev *wdev, struct ieee80211_supported_band *sband, struct nl80211_txrate_he *txrate, u16 mcs[NL80211_HE_NSS_MAX], unsigned int link_id) { const struct ieee80211_sta_he_cap *he_cap; u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {}; u16 tx_mcs_map = 0; u8 i; he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype); if (!he_cap) return false; memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX); tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); /* Build he_mcs_mask from HE capabilities */ he_build_mcs_mask(tx_mcs_map, tx_mcs_mask); for (i = 0; i < NL80211_HE_NSS_MAX; i++) { if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) mcs[i] = txrate->mcs[i]; else return false; } return true; } static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, struct nlattr *attrs[], enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask, struct net_device *dev, bool default_all_enabled, unsigned int link_id) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = dev->ieee80211_ptr; int rem, i; struct nlattr *tx_rates; struct ieee80211_supported_band *sband; u16 vht_tx_mcs_map, he_tx_mcs_map; memset(mask, 0, sizeof(*mask)); /* Default to all rates enabled */ for (i = 0; i < NUM_NL80211_BANDS; i++) { const struct ieee80211_sta_he_cap *he_cap; if (!default_all_enabled) break; sband = rdev->wiphy.bands[i]; if (!sband) continue; mask->control[i].legacy = (1 << sband->n_bitrates) - 1; memcpy(mask->control[i].ht_mcs, sband->ht_cap.mcs.rx_mask, sizeof(mask->control[i].ht_mcs)); if (sband->vht_cap.vht_supported) { vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); } he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype); if (!he_cap) continue; he_tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs); mask->control[i].he_gi = 0xFF; mask->control[i].he_ltf = 0xFF; } /* if no rates are given set it back to the defaults */ if (!attrs[attr]) goto out; /* The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum nl80211_band values used in cfg80211. */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, attrs[attr], rem) { enum nl80211_band band = nla_type(tx_rates); int err; if (band < 0 || band >= NUM_NL80211_BANDS) return -EINVAL; sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, NL80211_TXRATE_MAX, tx_rates, nl80211_txattr_policy, info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { mask->control[band].legacy = rateset_to_mask( sband, nla_data(tb[NL80211_TXRATE_LEGACY]), nla_len(tb[NL80211_TXRATE_LEGACY])); if ((mask->control[band].legacy == 0) && nla_len(tb[NL80211_TXRATE_LEGACY])) return -EINVAL; } if (tb[NL80211_TXRATE_HT]) { if (!ht_rateset_to_mask( sband, nla_data(tb[NL80211_TXRATE_HT]), nla_len(tb[NL80211_TXRATE_HT]), mask->control[band].ht_mcs)) return -EINVAL; } if (tb[NL80211_TXRATE_VHT]) { if (!vht_set_mcs_mask( sband, nla_data(tb[NL80211_TXRATE_VHT]), mask->control[band].vht_mcs)) return -EINVAL; } if (tb[NL80211_TXRATE_GI]) { mask->control[band].gi = nla_get_u8(tb[NL80211_TXRATE_GI]); if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) return -EINVAL; } if (tb[NL80211_TXRATE_HE] && !he_set_mcs_mask(info, wdev, sband, nla_data(tb[NL80211_TXRATE_HE]), mask->control[band].he_mcs, link_id)) return -EINVAL; if (tb[NL80211_TXRATE_HE_GI]) mask->control[band].he_gi = nla_get_u8(tb[NL80211_TXRATE_HE_GI]); if (tb[NL80211_TXRATE_HE_LTF]) mask->control[band].he_ltf = nla_get_u8(tb[NL80211_TXRATE_HE_LTF]); if (mask->control[band].legacy == 0) { /* don't allow empty legacy rates if HT, VHT or HE * are not even supported. */ if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || rdev->wiphy.bands[band]->vht_cap.vht_supported || ieee80211_get_he_iftype_cap(sband, wdev->iftype))) return -EINVAL; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) if (mask->control[band].ht_mcs[i]) goto out; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) if (mask->control[band].vht_mcs[i]) goto out; for (i = 0; i < NL80211_HE_NSS_MAX; i++) if (mask->control[band].he_mcs[i]) goto out; /* legacy and mcs rates may not be both empty */ return -EINVAL; } } out: return 0; } static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, enum nl80211_band band, struct cfg80211_bitrate_mask *beacon_rate) { u32 count_ht, count_vht, count_he, i; u32 rate = beacon_rate->control[band].legacy; /* Allow only one rate */ if (hweight32(rate) > 1) return -EINVAL; count_ht = 0; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].ht_mcs[i]) { count_ht++; if (count_ht > 1) return -EINVAL; } if (count_ht && rate) return -EINVAL; } count_vht = 0; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].vht_mcs[i]) { count_vht++; if (count_vht > 1) return -EINVAL; } if (count_vht && rate) return -EINVAL; } count_he = 0; for (i = 0; i < NL80211_HE_NSS_MAX; i++) { if (hweight16(beacon_rate->control[band].he_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].he_mcs[i]) { count_he++; if (count_he > 1) return -EINVAL; } if (count_he && rate) return -EINVAL; } if ((count_ht && count_vht && count_he) || (!rate && !count_ht && !count_vht && !count_he)) return -EINVAL; if (rate && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) return -EINVAL; if (count_ht && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_HT)) return -EINVAL; if (count_vht && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_VHT)) return -EINVAL; if (count_he && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_HE)) return -EINVAL; return 0; } static int nl80211_parse_mbssid_config(struct wiphy *wiphy, struct net_device *dev, struct nlattr *attrs, struct cfg80211_mbssid_config *config, u8 num_elems) { struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1]; if (!wiphy->mbssid_max_interfaces) return -EOPNOTSUPP; if (nla_parse_nested(tb, NL80211_MBSSID_CONFIG_ATTR_MAX, attrs, NULL, NULL) || !tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]) return -EINVAL; config->ema = nla_get_flag(tb[NL80211_MBSSID_CONFIG_ATTR_EMA]); if (config->ema) { if (!wiphy->ema_max_profile_periodicity) return -EOPNOTSUPP; if (num_elems > wiphy->ema_max_profile_periodicity) return -EINVAL; } config->index = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]); if (config->index >= wiphy->mbssid_max_interfaces || (!config->index && !num_elems)) return -EINVAL; if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) { u32 tx_ifindex = nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]); if ((!config->index && tx_ifindex != dev->ifindex) || (config->index && tx_ifindex == dev->ifindex)) return -EINVAL; if (tx_ifindex != dev->ifindex) { struct net_device *tx_netdev = dev_get_by_index(wiphy_net(wiphy), tx_ifindex); if (!tx_netdev || !tx_netdev->ieee80211_ptr || tx_netdev->ieee80211_ptr->wiphy != wiphy || tx_netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { dev_put(tx_netdev); return -EINVAL; } config->tx_wdev = tx_netdev->ieee80211_ptr; } else { config->tx_wdev = dev->ieee80211_ptr; } } else if (!config->index) { config->tx_wdev = dev->ieee80211_ptr; } else { return -EINVAL; } return 0; } static struct cfg80211_mbssid_elems * nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) { struct nlattr *nl_elems; struct cfg80211_mbssid_elems *elems; int rem_elems; u8 i = 0, num_elems = 0; if (!wiphy->mbssid_max_interfaces) return ERR_PTR(-EINVAL); nla_for_each_nested(nl_elems, attrs, rem_elems) { if (num_elems >= 255) return ERR_PTR(-EINVAL); num_elems++; } elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); if (!elems) return ERR_PTR(-ENOMEM); elems->cnt = num_elems; nla_for_each_nested(nl_elems, attrs, rem_elems) { elems->elem[i].data = nla_data(nl_elems); elems->elem[i].len = nla_len(nl_elems); i++; } return elems; } static struct cfg80211_rnr_elems * nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs, struct netlink_ext_ack *extack) { struct nlattr *nl_elems; struct cfg80211_rnr_elems *elems; int rem_elems; u8 i = 0, num_elems = 0; nla_for_each_nested(nl_elems, attrs, rem_elems) { int ret; ret = validate_ie_attr(nl_elems, extack); if (ret) return ERR_PTR(ret); num_elems++; } elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); if (!elems) return ERR_PTR(-ENOMEM); elems->cnt = num_elems; nla_for_each_nested(nl_elems, attrs, rem_elems) { elems->elem[i].data = nla_data(nl_elems); elems->elem[i].len = nla_len(nl_elems); i++; } return elems; } static int nl80211_parse_he_bss_color(struct nlattr *attrs, struct cfg80211_he_bss_color *he_bss_color) { struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; int err; err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, he_bss_color_policy, NULL); if (err) return err; if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) return -EINVAL; he_bss_color->color = nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); he_bss_color->enabled = !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); he_bss_color->partial = nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); return 0; } static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct cfg80211_beacon_data *bcn, struct netlink_ext_ack *extack) { bool haveinfo = false; int err; memset(bcn, 0, sizeof(*bcn)); bcn->link_id = nl80211_link_id(attrs); if (attrs[NL80211_ATTR_BEACON_HEAD]) { bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) return -EINVAL; haveinfo = true; } if (attrs[NL80211_ATTR_BEACON_TAIL]) { bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]); bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]); haveinfo = true; } if (!haveinfo) return -EINVAL; if (attrs[NL80211_ATTR_IE]) { bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]); bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]); } if (attrs[NL80211_ATTR_IE_PROBE_RESP]) { bcn->proberesp_ies = nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]); bcn->proberesp_ies_len = nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]); } if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) { bcn->assocresp_ies = nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]); bcn->assocresp_ies_len = nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]); } if (attrs[NL80211_ATTR_PROBE_RESP]) { bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]); bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]); } if (attrs[NL80211_ATTR_FTM_RESPONDER]) { struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1]; err = nla_parse_nested_deprecated(tb, NL80211_FTM_RESP_ATTR_MAX, attrs[NL80211_ATTR_FTM_RESPONDER], NULL, NULL); if (err) return err; if (tb[NL80211_FTM_RESP_ATTR_ENABLED] && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER)) bcn->ftm_responder = 1; else return -EOPNOTSUPP; if (tb[NL80211_FTM_RESP_ATTR_LCI]) { bcn->lci = nla_data(tb[NL80211_FTM_RESP_ATTR_LCI]); bcn->lci_len = nla_len(tb[NL80211_FTM_RESP_ATTR_LCI]); } if (tb[NL80211_FTM_RESP_ATTR_CIVICLOC]) { bcn->civicloc = nla_data(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]); bcn->civicloc_len = nla_len(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]); } } else { bcn->ftm_responder = -1; } if (attrs[NL80211_ATTR_HE_BSS_COLOR]) { err = nl80211_parse_he_bss_color(attrs[NL80211_ATTR_HE_BSS_COLOR], &bcn->he_bss_color); if (err) return err; bcn->he_bss_color_valid = true; } if (attrs[NL80211_ATTR_MBSSID_ELEMS]) { struct cfg80211_mbssid_elems *mbssid = nl80211_parse_mbssid_elems(&rdev->wiphy, attrs[NL80211_ATTR_MBSSID_ELEMS]); if (IS_ERR(mbssid)) return PTR_ERR(mbssid); bcn->mbssid_ies = mbssid; if (bcn->mbssid_ies && attrs[NL80211_ATTR_EMA_RNR_ELEMS]) { struct cfg80211_rnr_elems *rnr = nl80211_parse_rnr_elems(&rdev->wiphy, attrs[NL80211_ATTR_EMA_RNR_ELEMS], extack); if (IS_ERR(rnr)) return PTR_ERR(rnr); if (rnr && rnr->cnt < bcn->mbssid_ies->cnt) return -EINVAL; bcn->rnr_ies = rnr; } } return 0; } static int nl80211_parse_he_obss_pd(struct nlattr *attrs, struct ieee80211_he_obss_pd *he_obss_pd) { struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1]; int err; err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs, he_obss_pd_policy, NULL); if (err) return err; if (!tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL]) return -EINVAL; he_obss_pd->sr_ctrl = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL]); if (tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]) he_obss_pd->min_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]); if (tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]) he_obss_pd->max_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]); if (tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET]) he_obss_pd->non_srg_max_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET]); if (he_obss_pd->min_offset > he_obss_pd->max_offset) return -EINVAL; if (tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP]) memcpy(he_obss_pd->bss_color_bitmap, nla_data(tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP]), sizeof(he_obss_pd->bss_color_bitmap)); if (tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP]) memcpy(he_obss_pd->partial_bssid_bitmap, nla_data(tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP]), sizeof(he_obss_pd->partial_bssid_bitmap)); he_obss_pd->enable = true; return 0; } static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, struct nlattr *attrs, struct cfg80211_fils_discovery *fd) { struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1]; int ret; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_DISCOVERY)) return -EINVAL; ret = nla_parse_nested(tb, NL80211_FILS_DISCOVERY_ATTR_MAX, attrs, NULL, NULL); if (ret) return ret; if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] && !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] && !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) { fd->update = true; return 0; } if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] || !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] || !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) return -EINVAL; fd->tmpl_len = nla_len(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]); fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]); fd->update = true; return 0; } static int nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, struct nlattr *attrs, struct cfg80211_unsol_bcast_probe_resp *presp) { struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1]; int ret; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP)) return -EINVAL; ret = nla_parse_nested(tb, NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX, attrs, NULL, NULL); if (ret) return ret; if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] && !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) { presp->update = true; return 0; } if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] || !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) return -EINVAL; presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]); presp->update = true; return 0; } static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const struct element *rates) { int i; if (!rates) return; for (i = 0; i < rates->datalen; i++) { if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) params->ht_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) params->vht_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) params->he_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) params->sae_h2e_required = true; } } /* * Since the nl80211 API didn't include, from the beginning, attributes about * HT/VHT requirements/capabilities, we parse them out of the IEs for the * benefit of drivers that rebuild IEs in the firmware. */ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) { const struct cfg80211_beacon_data *bcn = ¶ms->beacon; size_t ies_len = bcn->tail_len; const u8 *ies = bcn->tail; const struct element *rates; const struct element *cap; rates = cfg80211_find_elem(WLAN_EID_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); rates = cfg80211_find_elem(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); cap = cfg80211_find_elem(WLAN_EID_HT_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->ht_cap)) params->ht_cap = (void *)cap->data; cap = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->vht_cap)) params->vht_cap = (void *)cap->data; cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->he_cap) + 1) params->he_cap = (void *)(cap->data + 1); cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->he_oper) + 1) params->he_oper = (void *)(cap->data + 1); cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_CAPABILITY, ies, ies_len); if (cap) { if (!cap->datalen) return -EINVAL; params->eht_cap = (void *)(cap->data + 1); if (!ieee80211_eht_capa_size_ok((const u8 *)params->he_cap, (const u8 *)params->eht_cap, cap->datalen - 1, true)) return -EINVAL; } cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, ies, ies_len); if (cap) { if (!cap->datalen) return -EINVAL; params->eht_oper = (void *)(cap->data + 1); if (!ieee80211_eht_oper_size_ok((const u8 *)params->eht_oper, cap->datalen - 1)) return -EINVAL; } return 0; } static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { struct wireless_dev *wdev; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; if (!wdev->u.ap.preset_chandef.chan) continue; params->chandef = wdev->u.ap.preset_chandef; return true; } return false; } static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, enum nl80211_auth_type auth_type, enum nl80211_commands cmd) { if (auth_type > NL80211_AUTHTYPE_MAX) return false; switch (cmd) { case NL80211_CMD_AUTHENTICATE: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && auth_type == NL80211_AUTHTYPE_SAE) return false; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_STA) && (auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK)) return false; return true; case NL80211_CMD_CONNECT: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD) && auth_type == NL80211_AUTHTYPE_SAE) return false; /* FILS with SK PFS or PK not supported yet */ if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) return false; if (!wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && auth_type == NL80211_AUTHTYPE_FILS_SK) return false; return true; case NL80211_CMD_START_AP: if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP) && auth_type == NL80211_AUTHTYPE_SAE) return false; /* FILS not supported yet */ if (auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) return false; return true; default: return false; } } static void nl80211_send_ap_started(struct wireless_dev *wdev, unsigned int link_id) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_START_AP); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) || (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params) { struct ieee80211_channel *channel = params->chandef.chan; if ((params->he_cap || params->he_oper) && (channel->flags & IEEE80211_CHAN_NO_HE)) return -EOPNOTSUPP; if ((params->eht_cap || params->eht_oper) && (channel->flags & IEEE80211_CHAN_NO_EHT)) return -EOPNOTSUPP; return 0; } static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings *params; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!rdev->ops->start_ap) return -EOPNOTSUPP; if (wdev->links[link_id].ap.beacon_interval) return -EALREADY; /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon, info->extack); if (err) goto out; params->beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); params->dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, params->beacon_interval); if (err) goto out; /* * In theory, some of these attributes should be required here * but since they were not used when the command was originally * added, keep them optional for old user space programs to let * them continue to work with drivers that do not need the * additional information -- drivers must check! */ if (info->attrs[NL80211_ATTR_SSID]) { params->ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); params->ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (params->ssid_len == 0) { err = -EINVAL; goto out; } if (wdev->u.ap.ssid_len && (wdev->u.ap.ssid_len != params->ssid_len || memcmp(wdev->u.ap.ssid, params->ssid, params->ssid_len))) { /* require identical SSID for MLO */ err = -EINVAL; goto out; } } else if (wdev->valid_links) { /* require SSID for MLO */ err = -EINVAL; goto out; } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) params->hidden_ssid = nla_get_u32( info->attrs[NL80211_ATTR_HIDDEN_SSID]); params->privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { params->auth_type = nla_get_u32( info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, params->auth_type, NL80211_CMD_START_AP)) { err = -EINVAL; goto out; } } else params->auth_type = NL80211_AUTHTYPE_AUTOMATIC; err = nl80211_crypto_settings(rdev, info, ¶ms->crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) goto out; if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) { if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) { err = -EOPNOTSUPP; goto out; } params->inactivity_timeout = nla_get_u16( info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { err = -EINVAL; goto out; } params->p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); if (params->p2p_ctwindow != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) { err = -EINVAL; goto out; } } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { err = -EINVAL; goto out; } tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); params->p2p_opp_ps = tmp; if (params->p2p_opp_ps != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) { err = -EINVAL; goto out; } } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); if (err) goto out; } else if (wdev->valid_links) { /* with MLD need to specify the channel configuration */ err = -EINVAL; goto out; } else if (wdev->u.ap.preset_chandef.chan) { params->chandef = wdev->u.ap.preset_chandef; } else if (!nl80211_get_ap_channel(rdev, params)) { err = -EINVAL; goto out; } if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { err = nl80211_parse_punct_bitmap(rdev, info, ¶ms->chandef, ¶ms->punct_bitmap); if (err) goto out; } if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, wdev->iftype)) { err = -EINVAL; goto out; } if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms->beacon_rate, dev, false, link_id); if (err) goto out; err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, ¶ms->beacon_rate); if (err) goto out; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params->smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); switch (params->smps_mode) { case NL80211_SMPS_OFF: break; case NL80211_SMPS_STATIC: if (!(rdev->wiphy.features & NL80211_FEATURE_STATIC_SMPS)) { err = -EINVAL; goto out; } break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & NL80211_FEATURE_DYNAMIC_SMPS)) { err = -EINVAL; goto out; } break; default: err = -EINVAL; goto out; } } else { params->smps_mode = NL80211_SMPS_OFF; } params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; goto out; } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params->acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); params->acl = NULL; goto out; } } params->twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms->he_obss_pd); if (err) goto out; } if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], ¶ms->fils_discovery); if (err) goto out; } if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], ¶ms->unsol_bcast_probe_resp); if (err) goto out; } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, info->attrs[NL80211_ATTR_MBSSID_CONFIG], ¶ms->mbssid_config, params->beacon.mbssid_ies ? params->beacon.mbssid_ies->cnt : 0); if (err) goto out; } if (!params->mbssid_config.ema && params->beacon.rnr_ies) { err = -EINVAL; goto out; } err = nl80211_calculate_ap_params(params); if (err) goto out; err = nl80211_validate_ap_phy_operation(params); if (err) goto out; if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) params->flags = nla_get_u32( info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; if (wdev->conn_owner_nlportid && info->attrs[NL80211_ATTR_SOCKET_OWNER] && wdev->conn_owner_nlportid != info->snd_portid) { err = -EINVAL; goto out; } /* FIXME: validate MLO/link-id against driver capabilities */ err = rdev_start_ap(rdev, dev, params); if (!err) { wdev->links[link_id].ap.beacon_interval = params->beacon_interval; wdev->links[link_id].ap.chandef = params->chandef; wdev->u.ap.ssid_len = params->ssid_len; memcpy(wdev->u.ap.ssid, params->ssid, params->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; nl80211_send_ap_started(wdev, link_id); } out: kfree(params->acl); kfree(params->beacon.mbssid_ies); if (params->mbssid_config.tx_wdev && params->mbssid_config.tx_wdev->netdev && params->mbssid_config.tx_wdev->netdev != dev) dev_put(params->mbssid_config.tx_wdev->netdev); kfree(params->beacon.rnr_ies); kfree(params); return err; } static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_update *params; struct nlattr *attr; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!rdev->ops->change_beacon) return -EOPNOTSUPP; if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon, info->extack); if (err) goto out; attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { err = nl80211_parse_fils_discovery(rdev, attr, ¶ms->fils_discovery); if (err) goto out; } attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]; if (attr) { err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, ¶ms->unsol_bcast_probe_resp); if (err) goto out; } err = rdev_change_beacon(rdev, dev, params); out: kfree(params->beacon.mbssid_ies); kfree(params->beacon.rnr_ies); kfree(params); return err; } static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; return cfg80211_stop_ap(rdev, dev, link_id, false); } static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { [NL80211_STA_FLAG_AUTHORIZED] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_TDLS_PEER] = { .type = NLA_FLAG }, }; static int parse_station_flags(struct genl_info *info, enum nl80211_iftype iftype, struct station_parameters *params) { struct nlattr *flags[NL80211_STA_FLAG_MAX + 1]; struct nlattr *nla; int flag; /* * Try parsing the new attribute first so userspace * can specify both for older kernels. */ nla = info->attrs[NL80211_ATTR_STA_FLAGS2]; if (nla) { struct nl80211_sta_flag_update *sta_flags; sta_flags = nla_data(nla); params->sta_flags_mask = sta_flags->mask; params->sta_flags_set = sta_flags->set; params->sta_flags_set &= params->sta_flags_mask; if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; return 0; } /* if present, parse the old attribute */ nla = info->attrs[NL80211_ATTR_STA_FLAGS]; if (!nla) return 0; if (nla_parse_nested_deprecated(flags, NL80211_STA_FLAG_MAX, nla, sta_flags_policy, info->extack)) return -EINVAL; /* * Only allow certain flags for interface types so that * other attributes are silently ignored. Remember that * this is backward compatibility code with old userspace * and shouldn't be hit in other cases anyway. */ switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_TDLS_PEER); break; case NL80211_IFTYPE_MESH_POINT: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EINVAL; } for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++) { if (flags[flag]) { params->sta_flags_set |= (1<<flag); /* no longer support new API additions in old API */ if (flag > NL80211_STA_FLAG_MAX_OLD_API) return -EINVAL; } } return 0; } bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) { struct nlattr *rate; u32 bitrate; u16 bitrate_compat; enum nl80211_rate_info rate_flg; rate = nla_nest_start_noflag(msg, attr); if (!rate) return false; /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); /* report 16-bit bitrate only if we can */ bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; if (bitrate > 0 && nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) return false; if (bitrate_compat > 0 && nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) return false; switch (info->bw) { case RATE_INFO_BW_1: rate_flg = NL80211_RATE_INFO_1_MHZ_WIDTH; break; case RATE_INFO_BW_2: rate_flg = NL80211_RATE_INFO_2_MHZ_WIDTH; break; case RATE_INFO_BW_4: rate_flg = NL80211_RATE_INFO_4_MHZ_WIDTH; break; case RATE_INFO_BW_5: rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH; break; case RATE_INFO_BW_8: rate_flg = NL80211_RATE_INFO_8_MHZ_WIDTH; break; case RATE_INFO_BW_10: rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH; break; case RATE_INFO_BW_16: rate_flg = NL80211_RATE_INFO_16_MHZ_WIDTH; break; default: WARN_ON(1); fallthrough; case RATE_INFO_BW_20: rate_flg = 0; break; case RATE_INFO_BW_40: rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH; break; case RATE_INFO_BW_80: rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH; break; case RATE_INFO_BW_160: rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH; break; case RATE_INFO_BW_HE_RU: rate_flg = 0; WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS)); break; case RATE_INFO_BW_320: rate_flg = NL80211_RATE_INFO_320_MHZ_WIDTH; break; case RATE_INFO_BW_EHT_RU: rate_flg = 0; WARN_ON(!(info->flags & RATE_INFO_FLAGS_EHT_MCS)); break; } if (rate_flg && nla_put_flag(msg, rate_flg)) return false; if (info->flags & RATE_INFO_FLAGS_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_VHT_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_HE_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm)) return false; if (info->bw == RATE_INFO_BW_HE_RU && nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC, info->he_ru_alloc)) return false; } else if (info->flags & RATE_INFO_FLAGS_S1G_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_NSS, info->nss)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_EHT_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_NSS, info->nss)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_GI, info->eht_gi)) return false; if (info->bw == RATE_INFO_BW_EHT_RU && nla_put_u8(msg, NL80211_RATE_INFO_EHT_RU_ALLOC, info->eht_ru_alloc)) return false; } nla_nest_end(msg, rate); return true; } static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, int id) { void *attr; int i = 0; if (!mask) return true; attr = nla_nest_start_noflag(msg, id); if (!attr) return false; for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { if (!(mask & BIT(i))) continue; if (nla_put_u8(msg, i, signal[i])) return false; } nla_nest_end(msg, attr); return true; } static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) { void *hdr; struct nlattr *sinfoattr, *bss_param; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) { cfg80211_sinfo_release_content(sinfo); return -1; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; #define PUT_SINFO(attr, memb, type) do { \ BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \ if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb)) \ goto nla_put_failure; \ } while (0) #define PUT_SINFO_U64(attr, memb) do { \ if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb, NL80211_STA_INFO_PAD)) \ goto nla_put_failure; \ } while (0) PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at); if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, (u32)sinfo->rx_bytes)) goto nla_put_failure; if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) | BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; PUT_SINFO_U64(RX_BYTES64, rx_bytes); PUT_SINFO_U64(TX_BYTES64, tx_bytes); PUT_SINFO(LLID, llid, u16); PUT_SINFO(PLID, plid, u16); PUT_SINFO(PLINK_STATE, plink_state, u8); PUT_SINFO_U64(RX_DURATION, rx_duration); PUT_SINFO_U64(TX_DURATION, tx_duration); if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) PUT_SINFO(AIRTIME_WEIGHT, airtime_weight, u16); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: PUT_SINFO(SIGNAL, signal, u8); PUT_SINFO(SIGNAL_AVG, signal_avg, u8); break; default: break; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal, NL80211_STA_INFO_CHAIN_SIGNAL)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal_avg, NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; } PUT_SINFO(RX_PACKETS, rx_packets, u32); PUT_SINFO(TX_PACKETS, tx_packets, u32); PUT_SINFO(TX_RETRIES, tx_retries, u32); PUT_SINFO(TX_FAILED, tx_failed, u32); PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32); PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); PUT_SINFO(LOCAL_PM, local_pm, u32); PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); PUT_SINFO(CONNECTED_TO_AS, connected_to_as, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start_noflag(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; if (((sinfo->bss_param.flags & BSS_PARAM_FLAGS_CTS_PROT) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_CTS_PROT)) || ((sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_PREAMBLE) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_SHORT_PREAMBLE)) || ((sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_SLOT_TIME) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME)) || nla_put_u8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD, sinfo->bss_param.dtim_period) || nla_put_u16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL, sinfo->bss_param.beacon_interval)) goto nla_put_failure; nla_nest_end(msg, bss_param); } if ((sinfo->filled & BIT_ULL(NL80211_STA_INFO_STA_FLAGS)) && nla_put(msg, NL80211_STA_INFO_STA_FLAGS, sizeof(struct nl80211_sta_flag_update), &sinfo->sta_flags)) goto nla_put_failure; PUT_SINFO_U64(T_OFFSET, t_offset); PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); PUT_SINFO_U64(BEACON_RX, rx_beacon); PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); PUT_SINFO(RX_MPDUS, rx_mpdu_count, u32); PUT_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32); if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) { PUT_SINFO(ACK_SIGNAL, ack_signal, u8); PUT_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8); } #undef PUT_SINFO #undef PUT_SINFO_U64 if (sinfo->pertid) { struct nlattr *tidsattr; int tid; tidsattr = nla_nest_start_noflag(msg, NL80211_STA_INFO_TID_STATS); if (!tidsattr) goto nla_put_failure; for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { struct cfg80211_tid_stats *tidstats; struct nlattr *tidattr; tidstats = &sinfo->pertid[tid]; if (!tidstats->filled) continue; tidattr = nla_nest_start_noflag(msg, tid + 1); if (!tidattr) goto nla_put_failure; #define PUT_TIDVAL_U64(attr, memb) do { \ if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ nla_put_u64_64bit(msg, NL80211_TID_STATS_ ## attr, \ tidstats->memb, NL80211_TID_STATS_PAD)) \ goto nla_put_failure; \ } while (0) PUT_TIDVAL_U64(RX_MSDU, rx_msdu); PUT_TIDVAL_U64(TX_MSDU, tx_msdu); PUT_TIDVAL_U64(TX_MSDU_RETRIES, tx_msdu_retries); PUT_TIDVAL_U64(TX_MSDU_FAILED, tx_msdu_failed); #undef PUT_TIDVAL_U64 if ((tidstats->filled & BIT(NL80211_TID_STATS_TXQ_STATS)) && !nl80211_put_txq_stats(msg, &tidstats->txq_stats, NL80211_TID_STATS_TXQ_STATS)) goto nla_put_failure; nla_nest_end(msg, tidattr); } nla_nest_end(msg, tidsattr); } nla_nest_end(msg, sinfoattr); if (sinfo->assoc_req_ies_len && nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len, sinfo->assoc_req_ies)) goto nla_put_failure; if (sinfo->assoc_resp_ies_len && nla_put(msg, NL80211_ATTR_RESP_IE, sinfo->assoc_resp_ies_len, sinfo->assoc_resp_ies)) goto nla_put_failure; if (sinfo->mlo_params_valid) { if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, sinfo->assoc_link_id)) goto nla_put_failure; if (!is_zero_ether_addr(sinfo->mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, sinfo->mld_addr)) goto nla_put_failure; } cfg80211_sinfo_release_content(sinfo); genlmsg_end(msg, hdr); return 0; nla_put_failure: cfg80211_sinfo_release_content(sinfo); genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { struct station_info sinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!wdev->netdev) { err = -EINVAL; goto out_err; } if (!rdev->ops->dump_station) { err = -EOPNOTSUPP; goto out_err; } while (1) { memset(&sinfo, 0, sizeof(sinfo)); err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, &sinfo) < 0) goto out; sta_idx++; } out: cb->args[2] = sta_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; int err; memset(&sinfo, 0, sizeof(sinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_station) return -EOPNOTSUPP; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { cfg80211_sinfo_release_content(&sinfo); return -ENOMEM; } if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } int cfg80211_check_station_change(struct wiphy *wiphy, struct station_parameters *params, enum cfg80211_station_type statype) { if (params->listen_interval != -1 && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; if (params->support_p2p_ps != -1 && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; if (params->aid && !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); switch (statype) { case CFG80211_STA_MESH_PEER_KERNEL: case CFG80211_STA_MESH_PEER_USER: /* * No ignoring the TDLS flag here -- the userspace mesh * code doesn't have the bug of including TDLS in the * mask everywhere. */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED))) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_SETUP: case CFG80211_STA_TDLS_PEER_ACTIVE: if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; /* ignore since it can't change */ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); break; default: /* disallow mesh-specific things */ if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) return -EINVAL; if (params->local_pm) return -EINVAL; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; } if (statype != CFG80211_STA_TDLS_PEER_SETUP && statype != CFG80211_STA_TDLS_PEER_ACTIVE) { /* TDLS can't be set, ... */ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; /* * ... but don't bother the driver with it. This works around * a hostapd/wpa_supplicant issue -- it always includes the * TLDS_PEER flag in the mask even for AP mode. */ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); } if (statype != CFG80211_STA_TDLS_PEER_SETUP && statype != CFG80211_STA_AP_CLIENT_UNASSOC) { /* reject other things that can't change */ if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) return -EINVAL; if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) return -EINVAL; if (params->link_sta_params.supported_rates) return -EINVAL; if (params->ext_capab || params->link_sta_params.ht_capa || params->link_sta_params.vht_capa || params->link_sta_params.he_capa || params->link_sta_params.eht_capa) return -EINVAL; } if (statype != CFG80211_STA_AP_CLIENT && statype != CFG80211_STA_AP_CLIENT_UNASSOC) { if (params->vlan) return -EINVAL; } switch (statype) { case CFG80211_STA_AP_MLME_CLIENT: /* Use this only for authorizing/unauthorizing a station */ if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) return -EOPNOTSUPP; break; case CFG80211_STA_AP_CLIENT: case CFG80211_STA_AP_CLIENT_UNASSOC: /* accept only the listed bits */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; /* but authenticated/associated only if driver handles it */ if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && params->sta_flags_mask & (BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED))) return -EINVAL; break; case CFG80211_STA_IBSS: case CFG80211_STA_AP_STA: /* reject any changes other than AUTHORIZED */ if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_SETUP: /* reject any changes other than AUTHORIZED or WME */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_WME))) return -EINVAL; /* force (at least) rates when authorizing */ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && !params->link_sta_params.supported_rates) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_ACTIVE: /* reject any changes */ return -EINVAL; case CFG80211_STA_MESH_PEER_KERNEL: if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; break; case CFG80211_STA_MESH_PEER_USER: if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION && params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; } /* * Older kernel versions ignored this attribute entirely, so don't * reject attempts to update it but mark it as unused instead so the * driver won't look at the data. */ if (statype != CFG80211_STA_AP_CLIENT_UNASSOC && statype != CFG80211_STA_TDLS_PEER_SETUP) params->link_sta_params.opmode_notif_used = false; return 0; } EXPORT_SYMBOL(cfg80211_check_station_change); /* * Get vlan interface making sure it is running and on the right wiphy. */ static struct net_device *get_vlan(struct genl_info *info, struct cfg80211_registered_device *rdev) { struct nlattr *vlanattr = info->attrs[NL80211_ATTR_STA_VLAN]; struct net_device *v; int ret; if (!vlanattr) return NULL; v = dev_get_by_index(genl_info_net(info), nla_get_u32(vlanattr)); if (!v) return ERR_PTR(-ENODEV); if (!v->ieee80211_ptr || v->ieee80211_ptr->wiphy != &rdev->wiphy) { ret = -EINVAL; goto error; } if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { ret = -EINVAL; goto error; } if (!netif_running(v)) { ret = -ENETDOWN; goto error; } return v; error: dev_put(v); return ERR_PTR(ret); } static int nl80211_parse_sta_wme(struct genl_info *info, struct station_parameters *params) { struct nlattr *tb[NL80211_STA_WME_MAX + 1]; struct nlattr *nla; int err; /* parse WME attributes if present */ if (!info->attrs[NL80211_ATTR_STA_WME]) return 0; nla = info->attrs[NL80211_ATTR_STA_WME]; err = nla_parse_nested_deprecated(tb, NL80211_STA_WME_MAX, nla, nl80211_sta_wme_policy, info->extack); if (err) return err; if (tb[NL80211_STA_WME_UAPSD_QUEUES]) params->uapsd_queues = nla_get_u8( tb[NL80211_STA_WME_UAPSD_QUEUES]); if (params->uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) return -EINVAL; if (tb[NL80211_STA_WME_MAX_SP]) params->max_sp = nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); if (params->max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) return -EINVAL; params->sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; return 0; } static int nl80211_parse_sta_channel_info(struct genl_info *info, struct station_parameters *params) { if (info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]) { params->supported_channels = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]); params->supported_channels_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]); /* * Need to include at least one (first channel, number of * channels) tuple for each subband (checked in policy), * and must have proper tuples for the rest of the data as well. */ if (params->supported_channels_len % 2) return -EINVAL; } if (info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]) { params->supported_oper_classes = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]); params->supported_oper_classes_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]); } return 0; } static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { int err; /* Dummy STA entry gets updated once the peer capabilities are known */ if (info->attrs[NL80211_ATTR_PEER_AID]) params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->link_sta_params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params->link_sta_params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params->link_sta_params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params->link_sta_params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params->link_sta_params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params->link_sta_params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params->link_sta_params.he_capa, (const u8 *)params->link_sta_params.eht_capa, params->link_sta_params.eht_capa_len, false)) return -EINVAL; } } err = nl80211_parse_sta_channel_info(info, params); if (err) return err; return nl80211_parse_sta_wme(info, params); } static int nl80211_parse_sta_txpower_setting(struct genl_info *info, struct sta_txpwr *txpwr, bool *txpwr_set) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int idx; if (info->attrs[NL80211_ATTR_STA_TX_POWER_SETTING]) { if (!rdev->ops->set_tx_power || !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_STA_TX_PWR)) return -EOPNOTSUPP; idx = NL80211_ATTR_STA_TX_POWER_SETTING; txpwr->type = nla_get_u8(info->attrs[idx]); if (txpwr->type == NL80211_TX_POWER_LIMITED) { idx = NL80211_ATTR_STA_TX_POWER; if (info->attrs[idx]) txpwr->power = nla_get_s16(info->attrs[idx]); else return -EINVAL; } *txpwr_set = true; } else { *txpwr_set = false; } return 0; } static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_parameters params; u8 *mac_addr; int err; memset(¶ms, 0, sizeof(params)); if (!rdev->ops->change_station) return -EOPNOTSUPP; /* * AID and listen_interval properties can be set only for unassociated * station. Include these parameters here and will check them in * cfg80211_check_station_change(). */ if (info->attrs[NL80211_ATTR_STA_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); else params.listen_interval = -1; if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) params.support_p2p_ps = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); else params.support_p2p_ps = -1; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLD_ADDR]) { /* If MLD_ADDR attribute is set then this is an MLD station * and the MLD_ADDR attribute holds the MLD address and the * MAC attribute holds for the LINK address. * In that case, the link_id is also expected to be valid. */ if (params.link_sta_params.link_id < 0) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_sta_params.mld_mac = mac_addr; params.link_sta_params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_sta_params.link_mac)) return -EINVAL; } else { mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { params.link_sta_params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.link_sta_params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { params.capability = nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; } if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { params.ext_capab = nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); params.ext_capab_len = nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) { params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) params.peer_aid = nla_get_u16( info->attrs[NL80211_ATTR_MESH_PEER_AID]); params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; } if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) params.local_pm = nla_get_u32( info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.link_sta_params.opmode_notif_used = true; params.link_sta_params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); if (params.airtime_weight && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; err = nl80211_parse_sta_txpower_setting(info, ¶ms.link_sta_params.txpwr, ¶ms.link_sta_params.txpwr_set); if (err) return err; /* Include parameters for TDLS peer (will check later) */ err = nl80211_set_station_tdls(info, ¶ms); if (err) return err; params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: break; default: err = -EOPNOTSUPP; goto out_put_vlan; } /* driver will call cfg80211_check_station_change() */ err = rdev_change_station(rdev, dev, mac_addr, ¶ms); out_put_vlan: dev_put(params.vlan); return err; } static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_parameters params; u8 *mac_addr = NULL; u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); memset(¶ms, 0, sizeof(params)); if (!rdev->ops->add_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_AID] && !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLD_ADDR]) { mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_sta_params.mld_mac = mac_addr; params.link_sta_params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_sta_params.link_mac)) return -EINVAL; } else { mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } params.link_sta_params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.link_sta_params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { params.support_p2p_ps = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); } else { /* * if not specified, assume it's supported for P2P GO interface, * and is NOT supported for AP interface */ params.support_p2p_ps = dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; } if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); else params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { params.capability = nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; } if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { params.ext_capab = nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); params.ext_capab_len = nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.link_sta_params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params.link_sta_params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params.link_sta_params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params.link_sta_params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params.link_sta_params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params.link_sta_params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params.link_sta_params.he_capa, (const u8 *)params.link_sta_params.eht_capa, params.link_sta_params.eht_capa_len, false)) return -EINVAL; } } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.link_sta_params.opmode_notif_used = true; params.link_sta_params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); if (params.airtime_weight && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; err = nl80211_parse_sta_txpower_setting(info, ¶ms.link_sta_params.txpwr, ¶ms.link_sta_params.txpwr_set); if (err) return err; err = nl80211_parse_sta_channel_info(info, ¶ms); if (err) return err; err = nl80211_parse_sta_wme(info, ¶ms); if (err) return err; if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT * as userspace might just pass through the capabilities from the IEs * directly, rather than enforcing this restriction and returning an * error in this case. */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { params.link_sta_params.ht_capa = NULL; params.link_sta_params.vht_capa = NULL; /* HE and EHT require WME */ if (params.link_sta_params.he_capa_len || params.link_sta_params.he_6ghz_capa || params.link_sta_params.eht_capa_len) return -EINVAL; } /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */ if (params.link_sta_params.he_6ghz_capa && (params.link_sta_params.ht_capa || params.link_sta_params.vht_capa)) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: /* ignore WME attributes if iface/sta is not capable */ if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) || !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* TDLS peers cannot be added */ if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); /* allow authenticated/associated only if driver handles it */ if (!(rdev->wiphy.features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && params.sta_flags_mask & auth_assoc) return -EINVAL; /* Older userspace, or userspace wanting to be compatible with * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth * and assoc flags in the mask, but assumes the station will be * added as associated anyway since this was the required driver * behaviour before NL80211_FEATURE_FULL_AP_CLIENT_STATE was * introduced. * In order to not bother drivers with this quirk in the API * set the flags in both the mask and set for new stations in * this case. */ if (!(params.sta_flags_mask & auth_assoc)) { params.sta_flags_mask |= auth_assoc; params.sta_flags_set |= auth_assoc; } /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* associated is disallowed */ if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; /* TDLS peers cannot be added */ if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* these are disallowed */ if (params.sta_flags_mask & (BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_AUTHENTICATED))) return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; /* Can only add if TDLS ... */ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -EOPNOTSUPP; /* ... with external setup is supported */ if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) return -EOPNOTSUPP; /* * Older wpa_supplicant versions always mark the TDLS peer * as authorized, but it shouldn't yet be. */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EOPNOTSUPP; } /* be aware of params.vlan when changing code here */ if (wdev->valid_links) { if (params.link_sta_params.link_id < 0) { err = -EINVAL; goto out; } if (!(wdev->valid_links & BIT(params.link_sta_params.link_id))) { err = -ENOLINK; goto out; } } else { if (params.link_sta_params.link_id >= 0) { err = -EINVAL; goto out; } } err = rdev_add_station(rdev, dev, mac_addr, ¶ms); out: dev_put(params.vlan); return err; } static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_del_parameters params; memset(¶ms, 0, sizeof(params)); if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: /* always accept these */ break; case NL80211_IFTYPE_ADHOC: /* conditionally accept */ if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_DEL_IBSS_STA)) break; return -EINVAL; default: return -EINVAL; } if (!rdev->ops->del_station) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MGMT_SUBTYPE]) { params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]); if (params.subtype != IEEE80211_STYPE_DISASSOC >> 4 && params.subtype != IEEE80211_STYPE_DEAUTH >> 4) return -EINVAL; } else { /* Default to Deauthentication frame */ params.subtype = IEEE80211_STYPE_DEAUTH >> 4; } if (info->attrs[NL80211_ATTR_REASON_CODE]) { params.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (params.reason_code == 0) return -EINVAL; /* 0 is reserved */ } else { /* Default to reason code 2 */ params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; } return rdev_del_station(rdev, dev, ¶ms); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { void *hdr; struct nlattr *pinfoattr; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_MPATH); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dst) || nla_put(msg, NL80211_ATTR_MPATH_NEXT_HOP, ETH_ALEN, next_hop) || nla_put_u32(msg, NL80211_ATTR_GENERATION, pinfo->generation)) goto nla_put_failure; pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MPATH_INFO); if (!pinfoattr) goto nla_put_failure; if ((pinfo->filled & MPATH_INFO_FRAME_QLEN) && nla_put_u32(msg, NL80211_MPATH_INFO_FRAME_QLEN, pinfo->frame_qlen)) goto nla_put_failure; if (((pinfo->filled & MPATH_INFO_SN) && nla_put_u32(msg, NL80211_MPATH_INFO_SN, pinfo->sn)) || ((pinfo->filled & MPATH_INFO_METRIC) && nla_put_u32(msg, NL80211_MPATH_INFO_METRIC, pinfo->metric)) || ((pinfo->filled & MPATH_INFO_EXPTIME) && nla_put_u32(msg, NL80211_MPATH_INFO_EXPTIME, pinfo->exptime)) || ((pinfo->filled & MPATH_INFO_FLAGS) && nla_put_u8(msg, NL80211_MPATH_INFO_FLAGS, pinfo->flags)) || ((pinfo->filled & MPATH_INFO_DISCOVERY_TIMEOUT) && nla_put_u32(msg, NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, pinfo->discovery_timeout)) || ((pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) && nla_put_u8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES, pinfo->discovery_retries)) || ((pinfo->filled & MPATH_INFO_HOP_COUNT) && nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT, pinfo->hop_count)) || ((pinfo->filled & MPATH_INFO_PATH_CHANGE) && nla_put_u32(msg, NL80211_MPATH_INFO_PATH_CHANGE, pinfo->path_change_count))) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; int path_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; goto out_err; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EOPNOTSUPP; goto out_err; } while (1) { err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, dst, next_hop, &pinfo) < 0) goto out; path_idx++; } out: cb->args[2] = path_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct mpath_info pinfo; struct sk_buff *msg; u8 *dst = NULL; u8 next_hop[ETH_ALEN]; memset(&pinfo, 0, sizeof(pinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; err = rdev_get_mpath(rdev, dev, dst, next_hop, &pinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; u8 *next_hop = NULL; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); if (!rdev->ops->change_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_change_mpath(rdev, dev, dst, next_hop); } static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; u8 *next_hop = NULL; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); if (!rdev->ops->add_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_add_mpath(rdev, dev, dst, next_hop); } static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; if (info->attrs[NL80211_ATTR_MAC]) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->del_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_del_mpath(rdev, dev, dst); } static int nl80211_get_mpp(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct mpath_info pinfo; struct sk_buff *msg; u8 *dst = NULL; u8 mpp[ETH_ALEN]; memset(&pinfo, 0, sizeof(pinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_mpp) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; err = rdev_get_mpp(rdev, dev, dst, mpp, &pinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, mpp, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_dump_mpp(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 mpp[ETH_ALEN]; int path_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; goto out_err; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EOPNOTSUPP; goto out_err; } while (1) { err = rdev_dump_mpp(rdev, wdev->netdev, path_idx, dst, mpp, &pinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, dst, mpp, &pinfo) < 0) goto out; path_idx++; } out: cb->args[2] = path_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct bss_parameters params; memset(¶ms, 0, sizeof(params)); params.link_id = nl80211_link_id_or_invalid(info->attrs); /* default to not changing parameters */ params.use_cts_prot = -1; params.use_short_preamble = -1; params.use_short_slot_time = -1; params.ap_isolate = -1; params.ht_opmode = -1; params.p2p_ctwindow = -1; params.p2p_opp_ps = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]); if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]) params.use_short_preamble = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]); if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) params.use_short_slot_time = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { params.basic_rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); params.basic_rates_len = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); } if (info->attrs[NL80211_ATTR_AP_ISOLATE]) params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE]) params.ht_opmode = nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; params.p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); if (params.p2p_ctwindow != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) return -EINVAL; } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); params.p2p_opp_ps = tmp; if (params.p2p_opp_ps && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) return -EINVAL; } if (!rdev->ops->change_bss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; return rdev_change_bss(rdev, dev, ¶ms); } static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { char *data = NULL; bool is_indoor; enum nl80211_user_reg_hint_type user_reg_hint_type; u32 owner_nlportid; /* * You should only get this when cfg80211 hasn't yet initialized * completely when built-in to the kernel right between the time * window between nl80211_init() and regulatory_init(), if that is * even possible. */ if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) user_reg_hint_type = nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); else user_reg_hint_type = NL80211_USER_REG_HINT_USER; switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); return regulatory_hint_user(data, user_reg_hint_type); case NL80211_USER_REG_HINT_INDOOR: if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) { owner_nlportid = info->snd_portid; is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR]; } else { owner_nlportid = 0; is_indoor = true; } return regulatory_hint_indoor(is_indoor, owner_nlportid); default: return -EINVAL; } } static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info) { return reg_reload_regdb(); } static int nl80211_get_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct mesh_config cur_params; int err = 0; void *hdr; struct nlattr *pinfoattr; struct sk_buff *msg; if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; /* If not connected, get default parameters */ if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); if (err) return err; /* Draw up a netlink message to send back */ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u16(msg, NL80211_MESHCONF_RETRY_TIMEOUT, cur_params.dot11MeshRetryTimeout) || nla_put_u16(msg, NL80211_MESHCONF_CONFIRM_TIMEOUT, cur_params.dot11MeshConfirmTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HOLDING_TIMEOUT, cur_params.dot11MeshHoldingTimeout) || nla_put_u16(msg, NL80211_MESHCONF_MAX_PEER_LINKS, cur_params.dot11MeshMaxPeerLinks) || nla_put_u8(msg, NL80211_MESHCONF_MAX_RETRIES, cur_params.dot11MeshMaxRetries) || nla_put_u8(msg, NL80211_MESHCONF_TTL, cur_params.dot11MeshTTL) || nla_put_u8(msg, NL80211_MESHCONF_ELEMENT_TTL, cur_params.element_ttl) || nla_put_u8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, cur_params.auto_open_plinks) || nla_put_u32(msg, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, cur_params.dot11MeshNbrOffsetMaxNeighbor) || nla_put_u8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, cur_params.dot11MeshHWMPmaxPREQretries) || nla_put_u32(msg, NL80211_MESHCONF_PATH_REFRESH_TIME, cur_params.path_refresh_time) || nla_put_u16(msg, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, cur_params.min_discovery_timeout) || nla_put_u32(msg, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, cur_params.dot11MeshHWMPactivePathTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, cur_params.dot11MeshHWMPpreqMinInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, cur_params.dot11MeshHWMPperrMinInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, cur_params.dot11MeshHWMPnetDiameterTraversalTime) || nla_put_u8(msg, NL80211_MESHCONF_HWMP_ROOTMODE, cur_params.dot11MeshHWMPRootMode) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_RANN_INTERVAL, cur_params.dot11MeshHWMPRannInterval) || nla_put_u8(msg, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, cur_params.dot11MeshGateAnnouncementProtocol) || nla_put_u8(msg, NL80211_MESHCONF_FORWARDING, cur_params.dot11MeshForwarding) || nla_put_s32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, cur_params.rssi_threshold) || nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE, cur_params.ht_opmode) || nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, cur_params.dot11MeshHWMPactivePathToRootTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, cur_params.dot11MeshHWMPconfirmationInterval) || nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, cur_params.dot11MeshAwakeWindowDuration) || nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, cur_params.plink_timeout) || nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE, cur_params.dot11MeshConnectedToMeshGate) || nla_put_u8(msg, NL80211_MESHCONF_NOLEARN, cur_params.dot11MeshNolearn) || nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_AS, cur_params.dot11MeshConnectedToAuthServer)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: out: nlmsg_free(msg); return -ENOBUFS; } static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_RETRY_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_CONFIRM_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_HOLDING_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_MAX_PEER_LINKS] = NLA_POLICY_RANGE(NLA_U16, 0, 255), [NL80211_MESHCONF_MAX_RETRIES] = NLA_POLICY_MAX(NLA_U8, 16), [NL80211_MESHCONF_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MESHCONF_ELEMENT_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = NLA_POLICY_RANGE(NLA_U32, 1, 255), [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_ROOTMODE] = NLA_POLICY_MAX(NLA_U8, 4), [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_FORWARDING] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_RSSI_THRESHOLD] = NLA_POLICY_RANGE(NLA_S32, -255, 0), [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_POWER_MODE] = NLA_POLICY_RANGE(NLA_U32, NL80211_MESH_POWER_ACTIVE, NL80211_MESH_POWER_MAX), [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1), [NL80211_MESHCONF_NOLEARN] = NLA_POLICY_RANGE(NLA_U8, 0, 1), [NL80211_MESHCONF_CONNECTED_TO_AS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), }; static const struct nla_policy nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = { [NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, }; static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) { struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; u16 ht_opmode; #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, mask, attr, fn) \ do { \ if (tb[attr]) { \ cfg->param = fn(tb[attr]); \ mask |= BIT((attr) - 1); \ } \ } while (0) if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config * parameters (otherwise our bitfield scheme would not work.) */ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask, NL80211_MESHCONF_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_PATH_REFRESH_TIME) && (cfg->path_refresh_time < 1 || cfg->path_refresh_time > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT) && (cfg->dot11MeshHWMPactivePathTimeout < 1 || cfg->dot11MeshHWMPactivePathTimeout > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPnetDiameterTraversalTime, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshGateAnnouncementProtocol, mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToMeshGate, mask, NL80211_MESHCONF_CONNECTED_TO_GATE, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToAuthServer, mask, NL80211_MESHCONF_CONNECTED_TO_AS, nla_get_u8); /* * Check HT operation mode based on * IEEE 802.11-2016 9.4.2.57 HT Operation element. */ if (tb[NL80211_MESHCONF_HT_OPMODE]) { ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) return -EINVAL; /* NON_HT_STA bit is reserved, but some programs set it */ ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; cfg->ht_opmode = ht_opmode; mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT) && (cfg->dot11MeshHWMPactivePathToRootTimeout < 1 || cfg->dot11MeshHWMPactivePathToRootTimeout > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPconfirmationInterval, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, mask, NL80211_MESHCONF_POWER_MODE, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask, NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNolearn, mask, NL80211_MESHCONF_NOLEARN, nla_get_u8); if (mask_out) *mask_out = mask; return 0; #undef FILL_IN_MESH_PARAM_IF_SET } static int nl80211_parse_mesh_setup(struct genl_info *info, struct mesh_setup *setup) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; if (!info->attrs[NL80211_ATTR_MESH_SETUP]) return -EINVAL; if (nla_parse_nested_deprecated(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) setup->sync_method = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC])) ? IEEE80211_SYNC_METHOD_VENDOR : IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL]) setup->path_sel_proto = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ? IEEE80211_PATH_PROTOCOL_VENDOR : IEEE80211_PATH_PROTOCOL_HWMP; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC]) setup->path_metric = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ? IEEE80211_PATH_METRIC_VENDOR : IEEE80211_PATH_METRIC_AIRTIME; if (tb[NL80211_MESH_SETUP_IE]) { struct nlattr *ieattr = tb[NL80211_MESH_SETUP_IE]; setup->ie = nla_data(ieattr); setup->ie_len = nla_len(ieattr); } if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) return -EINVAL; setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]); setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); if (setup->is_secure) setup->user_mpm = true; if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { if (!setup->user_mpm) return -EINVAL; setup->auth_id = nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); } return 0; } static int nl80211_update_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct mesh_config cfg = {}; u32 mask; int err; if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!rdev->ops->update_mesh_config) return -EOPNOTSUPP; err = nl80211_parse_mesh_config(info, &cfg, &mask); if (err) return err; if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) err = rdev_update_mesh_config(rdev, dev, mask, &cfg); return err; } static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, struct sk_buff *msg) { struct nlattr *nl_reg_rules; unsigned int i; if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || (regdom->dfs_region && nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) goto nla_put_failure; nl_reg_rules = nla_nest_start_noflag(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; const struct ieee80211_reg_rule *reg_rule; const struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule; unsigned int max_bandwidth_khz; reg_rule = ®dom->reg_rules[i]; freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; nl_reg_rule = nla_nest_start_noflag(msg, i); if (!nl_reg_rule) goto nla_put_failure; max_bandwidth_khz = freq_range->max_bandwidth_khz; if (!max_bandwidth_khz) max_bandwidth_khz = reg_get_max_bandwidth(regdom, reg_rule); if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS, reg_rule->flags) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_START, freq_range->start_freq_khz) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_END, freq_range->end_freq_khz) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW, max_bandwidth_khz) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, power_rule->max_eirp) || nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME, reg_rule->dfs_cac_ms)) goto nla_put_failure; if ((reg_rule->flags & NL80211_RRF_PSD) && nla_put_s8(msg, NL80211_ATTR_POWER_RULE_PSD, reg_rule->psd)) goto nla_put_failure; nla_nest_end(msg, nl_reg_rule); } nla_nest_end(msg, nl_reg_rules); return 0; nla_put_failure: return -EMSGSIZE; } static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) { const struct ieee80211_regdomain *regdom = NULL; struct cfg80211_registered_device *rdev; struct wiphy *wiphy = NULL; struct sk_buff *msg; int err = -EMSGSIZE; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; rtnl_lock(); if (info->attrs[NL80211_ATTR_WIPHY]) { bool self_managed; rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto nla_put_failure; } wiphy = &rdev->wiphy; self_managed = wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED; rcu_read_lock(); regdom = get_wiphy_regdom(wiphy); /* a self-managed-reg device must have a private regdom */ if (WARN_ON(!regdom && self_managed)) { err = -EINVAL; goto nla_put_failure_rcu; } if (regdom && nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure_rcu; } else { rcu_read_lock(); } if (!wiphy && reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure_rcu; if (!regdom) regdom = rcu_dereference(cfg80211_regdomain); if (nl80211_put_regdom(regdom, msg)) goto nla_put_failure_rcu; rcu_read_unlock(); genlmsg_end(msg, hdr); rtnl_unlock(); return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); nla_put_failure: rtnl_unlock(); put_failure: nlmsg_free(msg); return err; } static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct wiphy *wiphy, const struct ieee80211_regdomain *regdom) { void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_GET_REG); if (!hdr) return -1; genl_dump_check_consistent(cb, hdr); if (nl80211_put_regdom(regdom, msg)) goto nla_put_failure; if (!wiphy && reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure; if (wiphy && nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure; if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_get_reg_dump(struct sk_buff *skb, struct netlink_callback *cb) { const struct ieee80211_regdomain *regdom = NULL; struct cfg80211_registered_device *rdev; int err, reg_idx, start = cb->args[2]; rcu_read_lock(); if (cfg80211_regdomain && start == 0) { err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL, rcu_dereference(cfg80211_regdomain)); if (err < 0) goto out_err; } /* the global regdom is idx 0 */ reg_idx = 1; list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { regdom = get_wiphy_regdom(&rdev->wiphy); if (!regdom) continue; if (++reg_idx <= start) continue; err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, &rdev->wiphy, regdom); if (err < 0) { reg_idx--; break; } } cb->args[2] = reg_idx; err = skb->len; out_err: rcu_read_unlock(); return err; } #ifdef CONFIG_CFG80211_CRDA_SUPPORT static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 }, }; static int parse_reg_rule(struct nlattr *tb[], struct ieee80211_reg_rule *reg_rule) { struct ieee80211_freq_range *freq_range = ®_rule->freq_range; struct ieee80211_power_rule *power_rule = ®_rule->power_rule; if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_START]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_END]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) return -EINVAL; if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) return -EINVAL; reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); freq_range->start_freq_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); freq_range->end_freq_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); freq_range->max_bandwidth_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); power_rule->max_eirp = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) power_rule->max_antenna_gain = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); if (tb[NL80211_ATTR_DFS_CAC_TIME]) reg_rule->dfs_cac_ms = nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]); return 0; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; struct nlattr *nl_reg_rule; char *alpha2; int rem_reg_rules, r; u32 num_rules = 0, rule_idx = 0; enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET; struct ieee80211_regdomain *rd; if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REG_RULES]) return -EINVAL; alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); if (info->attrs[NL80211_ATTR_DFS_REGION]) dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { num_rules++; if (num_rules > NL80211_MAX_SUPP_REG_RULES) return -EINVAL; } rtnl_lock(); if (!reg_is_valid_request(alpha2)) { r = -EINVAL; goto out; } rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); if (!rd) { r = -ENOMEM; goto out; } rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; rd->alpha2[1] = alpha2[1]; /* * Disable DFS master mode if the DFS region was * not supported or known on this kernel. */ if (reg_supported_dfs_region(dfs_region)) rd->dfs_region = dfs_region; nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { r = nla_parse_nested_deprecated(tb, NL80211_REG_RULE_ATTR_MAX, nl_reg_rule, reg_rule_policy, info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); if (r) goto bad_reg; rule_idx++; if (rule_idx > NL80211_MAX_SUPP_REG_RULES) { r = -EINVAL; goto bad_reg; } } r = set_regdom(rd, REGD_SOURCE_CRDA); /* set_regdom takes ownership of rd */ rd = NULL; bad_reg: kfree(rd); out: rtnl_unlock(); return r; } #endif /* CONFIG_CFG80211_CRDA_SUPPORT */ static int validate_scan_freqs(struct nlattr *freqs) { struct nlattr *attr1, *attr2; int n_channels = 0, tmp1, tmp2; nla_for_each_nested(attr1, freqs, tmp1) if (nla_len(attr1) != sizeof(u32)) return 0; nla_for_each_nested(attr1, freqs, tmp1) { n_channels++; /* * Some hardware has a limited channel list for * scanning, and it is pretty much nonsensical * to scan for a channel twice, so disallow that * and don't require drivers to check that the * channel list they get isn't longer than what * they can scan, as long as they can scan all * the channels they registered at once. */ nla_for_each_nested(attr2, freqs, tmp2) if (attr1 != attr2 && nla_get_u32(attr1) == nla_get_u32(attr2)) return 0; } return n_channels; } static bool is_band_valid(struct wiphy *wiphy, enum nl80211_band b) { return b < NUM_NL80211_BANDS && wiphy->bands[b]; } static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, struct cfg80211_bss_selection *bss_select) { struct nlattr *attr[NL80211_BSS_SELECT_ATTR_MAX + 1]; struct nlattr *nest; int err; bool found = false; int i; /* only process one nested attribute */ nest = nla_data(nla); if (!nla_ok(nest, nla_len(nest))) return -EINVAL; err = nla_parse_nested_deprecated(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, nl80211_bss_select_policy, NULL); if (err) return err; /* only one attribute may be given */ for (i = 0; i <= NL80211_BSS_SELECT_ATTR_MAX; i++) { if (attr[i]) { if (found) return -EINVAL; found = true; } } bss_select->behaviour = __NL80211_BSS_SELECT_ATTR_INVALID; if (attr[NL80211_BSS_SELECT_ATTR_RSSI]) bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI; if (attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]) { bss_select->behaviour = NL80211_BSS_SELECT_ATTR_BAND_PREF; bss_select->param.band_pref = nla_get_u32(attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]); if (!is_band_valid(wiphy, bss_select->param.band_pref)) return -EINVAL; } if (attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]) { struct nl80211_bss_select_rssi_adjust *adj_param; adj_param = nla_data(attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]); bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI_ADJUST; bss_select->param.adjust.band = adj_param->band; bss_select->param.adjust.delta = adj_param->delta; if (!is_band_valid(wiphy, bss_select->param.adjust.band)) return -EINVAL; } /* user-space did not provide behaviour attribute */ if (bss_select->behaviour == __NL80211_BSS_SELECT_ATTR_INVALID) return -EINVAL; if (!(wiphy->bss_select_support & BIT(bss_select->behaviour))) return -EINVAL; return 0; } int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask) { int i; if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) { eth_zero_addr(mac_addr); eth_zero_addr(mac_addr_mask); mac_addr[0] = 0x2; mac_addr_mask[0] = 0x3; return 0; } /* need both or none */ if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_MAC_MASK]) return -EINVAL; memcpy(mac_addr, nla_data(attrs[NL80211_ATTR_MAC]), ETH_ALEN); memcpy(mac_addr_mask, nla_data(attrs[NL80211_ATTR_MAC_MASK]), ETH_ALEN); /* don't allow or configure an mcast address */ if (!is_multicast_ether_addr(mac_addr_mask) || is_multicast_ether_addr(mac_addr)) return -EINVAL; /* * allow users to pass a MAC address that has bits set outside * of the mask, but don't bother drivers with having to deal * with such bits */ for (i = 0; i < ETH_ALEN; i++) mac_addr[i] &= mac_addr_mask[i]; return 0; } static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, struct ieee80211_channel *chan) { unsigned int link_id; bool all_ok = true; lockdep_assert_wiphy(wdev->wiphy); if (!cfg80211_beaconing_iface_active(wdev)) return true; /* * FIXME: check if we have a free HW resource/link for chan * * This, as well as the FIXME below, requires knowing the link * capabilities of the hardware. */ /* we cannot leave radar channels */ for_each_valid_link(wdev, link_id) { struct cfg80211_chan_def *chandef; chandef = wdev_chandef(wdev, link_id); if (!chandef || !chandef->chan) continue; /* * FIXME: don't require all_ok, but rather check only the * correct HW resource/link onto which 'chan' falls, * as only that link leaves the channel for doing * the off-channel operation. */ if (chandef->chan->flags & IEEE80211_CHAN_RADAR) all_ok = false; } if (all_ok) return true; return regulatory_pre_cac_allowed(wdev->wiphy); } static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag, enum nl80211_ext_feature_index feat) { if (!(flags & flag)) return true; if (wiphy_ext_feature_isset(wiphy, feat)) return true; return false; } static int nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, void *request, struct nlattr **attrs, bool is_sched_scan) { u8 *mac_addr, *mac_addr_mask; u32 *flags; enum nl80211_feature_flags randomness_flag; if (!attrs[NL80211_ATTR_SCAN_FLAGS]) return 0; if (is_sched_scan) { struct cfg80211_sched_scan_request *req = request; randomness_flag = wdev ? NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : NL80211_FEATURE_ND_RANDOM_MAC_ADDR; flags = &req->flags; mac_addr = req->mac_addr; mac_addr_mask = req->mac_addr_mask; } else { struct cfg80211_scan_request *req = request; randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; flags = &req->flags; mac_addr = req->mac_addr; mac_addr_mask = req->mac_addr_mask; } *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_LOW_SPAN, NL80211_EXT_FEATURE_LOW_SPAN_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_LOW_POWER, NL80211_EXT_FEATURE_LOW_POWER_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_HIGH_ACCURACY, NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME, NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP, NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE, NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_RANDOM_SN, NL80211_EXT_FEATURE_SCAN_RANDOM_SN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_MIN_PREQ_CONTENT, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT)) return -EOPNOTSUPP; if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { int err; if (!(wiphy->features & randomness_flag) || (wdev && wdev->connected)) return -EOPNOTSUPP; err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); if (err) return err; } return 0; } static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_scan_request *request; struct nlattr *scan_freqs = NULL; bool scan_freqs_khz = false; struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; size_t ie_len, size; wiphy = &rdev->wiphy; if (wdev->iftype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->scan) return -EOPNOTSUPP; if (rdev->scan_req || rdev->scan_msg) return -EBUSY; if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) { if (!wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_SCAN_FREQ_KHZ)) return -EOPNOTSUPP; scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]; scan_freqs_khz = true; } else if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]; if (scan_freqs) { n_channels = validate_scan_freqs(scan_freqs); if (!n_channels) return -EINVAL; } else { n_channels = ieee80211_get_num_supported_channels(wiphy); } if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; if (n_ssids > wiphy->max_scan_ssids) return -EINVAL; if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; size = struct_size(request, channels, n_channels); size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); } i = 0; if (scan_freqs) { /* user specified, bail out if channel not found */ nla_for_each_nested(attr, scan_freqs, tmp) { struct ieee80211_channel *chan; int freq = nla_get_u32(attr); if (!scan_freqs_khz) freq = MHZ_TO_KHZ(freq); chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan) { err = -EINVAL; goto out_free; } /* ignore disabled channels */ if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } else { enum nl80211_band band; /* all channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { struct ieee80211_channel *chan; chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } } if (!i) { err = -EINVAL; goto out_free; } request->n_channels = i; for (i = 0; i < request->n_channels; i++) { struct ieee80211_channel *chan = request->channels[i]; /* if we can go off-channel to the target channel we're good */ if (cfg80211_off_channel_oper_allowed(wdev, chan)) continue; if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { err = -EBUSY; goto out_free; } } i = 0; if (n_ssids) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } } if (info->attrs[NL80211_ATTR_IE]) { request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); memcpy((void *)request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), request->ie_len); } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) request->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; if (info->attrs[NL80211_ATTR_SCAN_SUPP_RATES]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { enum nl80211_band band = nla_type(attr); if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; goto out_free; } if (!wiphy->bands[band]) continue; err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), &request->rates[band]); if (err) goto out_free; } } if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) { request->duration = nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]); request->duration_mandatory = nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, false); if (err) goto out_free; request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* Initial implementation used NL80211_ATTR_MAC to set the specific * BSSID to scan for. This was problematic because that same attribute * was already used for another purpose (local random MAC address). The * NL80211_ATTR_BSSID attribute was added to fix this. For backwards * compatibility with older userspace components, also use the * NL80211_ATTR_MAC value here if it can be determined to be used for * the specific BSSID use case instead of the random MAC address * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). */ if (info->attrs[NL80211_ATTR_BSSID]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && info->attrs[NL80211_ATTR_MAC]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else eth_broadcast_addr(request->bssid); request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; rdev->scan_req = request; err = cfg80211_scan(rdev); if (err) goto out_free; nl80211_send_scan_start(rdev, wdev); dev_hold(wdev->netdev); return 0; out_free: rdev->scan_req = NULL; kfree(request); return err; } static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->abort_scan) return -EOPNOTSUPP; if (rdev->scan_msg) return 0; if (!rdev->scan_req) return -ENOENT; rdev_abort_scan(rdev, wdev); return 0; } static int nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, struct cfg80211_sched_scan_request *request, struct nlattr **attrs) { int tmp, err, i = 0; struct nlattr *attr; if (!attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { u32 interval; /* * If scan plans are not specified, * %NL80211_ATTR_SCHED_SCAN_INTERVAL will be specified. In this * case one scan plan will be set with the specified scan * interval and infinite number of iterations. */ interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]); if (!interval) return -EINVAL; request->scan_plans[0].interval = DIV_ROUND_UP(interval, MSEC_PER_SEC); if (!request->scan_plans[0].interval) return -EINVAL; if (request->scan_plans[0].interval > wiphy->max_sched_scan_plan_interval) request->scan_plans[0].interval = wiphy->max_sched_scan_plan_interval; return 0; } nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) { struct nlattr *plan[NL80211_SCHED_SCAN_PLAN_MAX + 1]; if (WARN_ON(i >= n_plans)) return -EINVAL; err = nla_parse_nested_deprecated(plan, NL80211_SCHED_SCAN_PLAN_MAX, attr, nl80211_plan_policy, NULL); if (err) return err; if (!plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]) return -EINVAL; request->scan_plans[i].interval = nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]); if (!request->scan_plans[i].interval || request->scan_plans[i].interval > wiphy->max_sched_scan_plan_interval) return -EINVAL; if (plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]) { request->scan_plans[i].iterations = nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]); if (!request->scan_plans[i].iterations || (request->scan_plans[i].iterations > wiphy->max_sched_scan_plan_iterations)) return -EINVAL; } else if (i < n_plans - 1) { /* * All scan plans but the last one must specify * a finite number of iterations */ return -EINVAL; } i++; } /* * The last scan plan must not specify the number of * iterations, it is supposed to run infinitely */ if (request->scan_plans[n_plans - 1].iterations) return -EINVAL; return 0; } static int nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy, struct cfg80211_match_set *match_sets, struct nlattr *tb_band_rssi, s32 rssi_thold) { struct nlattr *attr; int i, tmp, ret = 0; if (!wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) { if (tb_band_rssi) ret = -EOPNOTSUPP; else for (i = 0; i < NUM_NL80211_BANDS; i++) match_sets->per_band_rssi_thold[i] = NL80211_SCAN_RSSI_THOLD_OFF; return ret; } for (i = 0; i < NUM_NL80211_BANDS; i++) match_sets->per_band_rssi_thold[i] = rssi_thold; nla_for_each_nested(attr, tb_band_rssi, tmp) { enum nl80211_band band = nla_type(attr); if (band < 0 || band >= NUM_NL80211_BANDS) return -EINVAL; match_sets->per_band_rssi_thold[band] = nla_get_s32(attr); } return 0; } static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs, int max_match_sets) { struct cfg80211_sched_scan_request *request; struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum nl80211_band band; size_t ie_len, size; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( attrs[NL80211_ATTR_SCAN_FREQUENCIES]); if (!n_channels) return ERR_PTR(-EINVAL); } else { n_channels = ieee80211_get_num_supported_channels(wiphy); } if (attrs[NL80211_ATTR_SCAN_SSIDS]) nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; if (n_ssids > wiphy->max_sched_scan_ssids) return ERR_PTR(-EINVAL); /* * First, count the number of 'real' matchsets. Due to an issue with * the old implementation, matchsets containing only the RSSI attribute * (NL80211_SCHED_SCAN_MATCH_ATTR_RSSI) are considered as the 'default' * RSSI for all matchsets, rather than their own matchset for reporting * all APs with a strong RSSI. This is needed to be compatible with * older userspace that treated a matchset with only the RSSI as the * global RSSI for all other matchsets - if there are other matchsets. */ if (attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { struct nlattr *rssi; err = nla_parse_nested_deprecated(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, attr, nl80211_match_policy, NULL); if (err) return ERR_PTR(err); /* SSID and BSSID are mutually exclusive */ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] && tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) return ERR_PTR(-EINVAL); /* add other standalone attributes here */ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] || tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) { n_match_sets++; continue; } rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; if (rssi) default_match_rssi = nla_get_s32(rssi); } } /* However, if there's no other matchset, add the RSSI one */ if (!n_match_sets && default_match_rssi != NL80211_SCAN_RSSI_THOLD_OFF) n_match_sets = 1; if (n_match_sets > max_match_sets) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_IE]) ie_len = nla_len(attrs[NL80211_ATTR_IE]); else ie_len = 0; if (ie_len > wiphy->max_sched_scan_ie_len) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { /* * NL80211_ATTR_SCHED_SCAN_INTERVAL must not be specified since * each scan plan already specifies its own interval */ if (attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) return ERR_PTR(-EINVAL); nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) n_plans++; } else { /* * The scan interval attribute is kept for backward * compatibility. If no scan plans are specified and sched scan * interval is specified, one scan plan will be set with this * scan interval and infinite number of iterations. */ if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) return ERR_PTR(-EINVAL); n_plans = 1; } if (!n_plans || n_plans > wiphy->max_sched_scan_plans) return ERR_PTR(-EINVAL); if (!wiphy_ext_feature_isset( wiphy, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI) && (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] || attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) return ERR_PTR(-EINVAL); size = struct_size(request, channels, n_channels); size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); size = size_add(size, array_size(sizeof(*request->match_sets), n_match_sets)); size = size_add(size, array_size(sizeof(*request->scan_plans), n_plans)); size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); } if (n_match_sets) { if (request->ie) request->match_sets = (void *)(request->ie + ie_len); else if (n_ssids) request->match_sets = (void *)(request->ssids + n_ssids); else request->match_sets = (void *)(request->channels + n_channels); } request->n_match_sets = n_match_sets; if (n_match_sets) request->scan_plans = (void *)(request->match_sets + n_match_sets); else if (request->ie) request->scan_plans = (void *)(request->ie + ie_len); else if (n_ssids) request->scan_plans = (void *)(request->ssids + n_ssids); else request->scan_plans = (void *)(request->channels + n_channels); request->n_scan_plans = n_plans; i = 0; if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) { struct ieee80211_channel *chan; chan = ieee80211_get_channel(wiphy, nla_get_u32(attr)); if (!chan) { err = -EINVAL; goto out_free; } /* ignore disabled channels */ if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } else { /* all channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { struct ieee80211_channel *chan; chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } } if (!i) { err = -EINVAL; goto out_free; } request->n_channels = i; i = 0; if (n_ssids) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } } i = 0; if (attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { struct nlattr *ssid, *bssid, *rssi; err = nla_parse_nested_deprecated(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, attr, nl80211_match_policy, NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]; if (!ssid && !bssid) { i++; continue; } if (WARN_ON(i >= n_match_sets)) { /* this indicates a programming error, * the loop above should have verified * things properly */ err = -EINVAL; goto out_free; } if (ssid) { memcpy(request->match_sets[i].ssid.ssid, nla_data(ssid), nla_len(ssid)); request->match_sets[i].ssid.ssid_len = nla_len(ssid); } if (bssid) memcpy(request->match_sets[i].bssid, nla_data(bssid), ETH_ALEN); /* special attribute - old implementation w/a */ request->match_sets[i].rssi_thold = default_match_rssi; rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; if (rssi) request->match_sets[i].rssi_thold = nla_get_s32(rssi); /* Parse per band RSSI attribute */ err = nl80211_parse_sched_scan_per_band_rssi(wiphy, &request->match_sets[i], tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI], request->match_sets[i].rssi_thold); if (err) goto out_free; i++; } /* there was no other matchset, so the RSSI one is alone */ if (i == 0 && n_match_sets) request->match_sets[0].rssi_thold = default_match_rssi; request->min_rssi_thold = INT_MAX; for (i = 0; i < n_match_sets; i++) request->min_rssi_thold = min(request->match_sets[i].rssi_thold, request->min_rssi_thold); } else { request->min_rssi_thold = NL80211_SCAN_RSSI_THOLD_OFF; } if (ie_len) { request->ie_len = ie_len; memcpy((void *)request->ie, nla_data(attrs[NL80211_ATTR_IE]), request->ie_len); } err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); if (err) goto out_free; if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) request->delay = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); if (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]) { request->relative_rssi = nla_get_s8( attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]); request->relative_rssi_set = true; } if (request->relative_rssi_set && attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]) { struct nl80211_bss_select_rssi_adjust *rssi_adjust; rssi_adjust = nla_data( attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]); request->rssi_adjust.band = rssi_adjust->band; request->rssi_adjust.delta = rssi_adjust->delta; if (!is_band_valid(wiphy, request->rssi_adjust.band)) { err = -EINVAL; goto out_free; } } err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs); if (err) goto out_free; request->scan_start = jiffies; return request; out_free: kfree(request); return ERR_PTR(err); } static int nl80211_start_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_sched_scan_request *sched_scan_req; bool want_multi; int err; if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_start) return -EOPNOTSUPP; want_multi = info->attrs[NL80211_ATTR_SCHED_SCAN_MULTI]; err = cfg80211_sched_scan_req_possible(rdev, want_multi); if (err) return err; sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, info->attrs, rdev->wiphy.max_match_sets); err = PTR_ERR_OR_ZERO(sched_scan_req); if (err) goto out_err; /* leave request id zero for legacy request * or if driver does not support multi-scheduled scan */ if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) sched_scan_req->reqid = cfg80211_assign_cookie(rdev); err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; sched_scan_req->dev = dev; sched_scan_req->wiphy = &rdev->wiphy; if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) sched_scan_req->owner_nlportid = info->snd_portid; cfg80211_add_sched_scan_req(rdev, sched_scan_req); nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN); return 0; out_free: kfree(sched_scan_req); out_err: return err; } static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_sched_scan_request *req; struct cfg80211_registered_device *rdev = info->user_ptr[0]; u64 cookie; if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_COOKIE]) { cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return __cfg80211_stop_sched_scan(rdev, cookie, false); } req = list_first_or_null_rcu(&rdev->sched_scan_req_list, struct cfg80211_sched_scan_request, list); if (!req || req->reqid || (req->owner_nlportid && req->owner_nlportid != info->snd_portid)) return -ENOENT; return cfg80211_stop_sched_scan_req(rdev, req, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; int err = -EINVAL; flush_delayed_work(&rdev->dfs_update_channels_wk); wiphy_lock(wiphy); dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) goto unlock; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) goto unlock; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) goto unlock; if (err == 0) { err = -EINVAL; goto unlock; } if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) { err = -EINVAL; goto unlock; } if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { err = cfg80211_start_background_radar_detection(rdev, wdev, &chandef); goto unlock; } if (netif_carrier_ok(dev)) { err = -EBUSY; goto unlock; } if (wdev->cac_started) { err = -EBUSY; goto unlock; } /* CAC start is offloaded to HW and can't be started manually */ if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) { err = -EOPNOTSUPP; goto unlock; } if (!rdev->ops->start_radar_detection) { err = -EOPNOTSUPP; goto unlock; } cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms); if (!err) { wdev->links[0].ap.chandef = chandef; wdev->cac_started = true; wdev->cac_start_time = jiffies; wdev->cac_time_ms = cac_time_ms; } unlock: wiphy_unlock(wiphy); return err; } static int nl80211_notify_radar_detection(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; int err; dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) { GENL_SET_ERR_MSG(info, "DFS Region is not set. Unexpected Radar indication"); return -EINVAL; } err = nl80211_parse_chandef(rdev, info, &chandef); if (err) { GENL_SET_ERR_MSG(info, "Unable to extract chandef info"); return err; } err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) { GENL_SET_ERR_MSG(info, "chandef is invalid"); return err; } if (err == 0) { GENL_SET_ERR_MSG(info, "Unexpected Radar indication for chandef/iftype"); return -EINVAL; } /* Do not process this notification if radar is already detected * by kernel on this channel, and return success. */ if (chandef.chan->dfs_state == NL80211_DFS_UNAVAILABLE) return 0; cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_UNAVAILABLE); cfg80211_sched_dfs_chan_update(rdev); rdev->radar_chandef = chandef; /* Propagate this notification to other radios as well */ queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); return 0; } static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_csa_settings params; struct nlattr **csa_attrs = NULL; int err; bool need_new_beacon = false; bool need_handle_dfs_flag = true; int len, i; u32 cs_count; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: need_new_beacon = true; /* For all modes except AP the handle_dfs flag needs to be * supplied to tell the kernel that userspace will handle radar * events when they happen. Otherwise a switch to a channel * requiring DFS will be rejected. */ need_handle_dfs_flag = false; /* useless if AP is not running */ if (!wdev->links[link_id].ap.beacon_interval) return -ENOTCONN; break; case NL80211_IFTYPE_ADHOC: if (!wdev->u.ibss.ssid_len) return -ENOTCONN; break; case NL80211_IFTYPE_MESH_POINT: if (!wdev->u.mesh.id_len) return -ENOTCONN; break; default: return -EOPNOTSUPP; } memset(¶ms, 0, sizeof(params)); params.beacon_csa.ftm_responder = -1; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]) return -EINVAL; /* only important for AP, IBSS and mesh create IEs internally */ if (need_new_beacon && !info->attrs[NL80211_ATTR_CSA_IES]) return -EINVAL; /* Even though the attribute is u32, the specification says * u8, so let's make sure we don't overflow. */ cs_count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); if (cs_count > 255) return -EINVAL; params.count = cs_count; if (!need_new_beacon) goto skip_beacons; err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_after, info->extack); if (err) goto free; csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs), GFP_KERNEL); if (!csa_attrs) { err = -ENOMEM; goto free; } err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], nl80211_policy, info->extack); if (err) goto free; err = nl80211_parse_beacon(rdev, csa_attrs, ¶ms.beacon_csa, info->extack); if (err) goto free; if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { err = -EINVAL; goto free; } len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); if (!len || (len % sizeof(u16))) { err = -EINVAL; goto free; } params.n_counter_offsets_beacon = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && (params.n_counter_offsets_beacon > rdev->wiphy.max_num_csa_counters)) { err = -EINVAL; goto free; } params.counter_offsets_beacon = nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); /* sanity checks - counters should fit and be the same */ for (i = 0; i < params.n_counter_offsets_beacon; i++) { u16 offset = params.counter_offsets_beacon[i]; if (offset >= params.beacon_csa.tail_len) { err = -EINVAL; goto free; } if (params.beacon_csa.tail[offset] != params.count) { err = -EINVAL; goto free; } } if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); if (!len || (len % sizeof(u16))) { err = -EINVAL; goto free; } params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) { err = -EINVAL; goto free; } params.counter_offsets_presp = nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); /* sanity checks - counters should fit and be the same */ for (i = 0; i < params.n_counter_offsets_presp; i++) { u16 offset = params.counter_offsets_presp[i]; if (offset >= params.beacon_csa.probe_resp_len) { err = -EINVAL; goto free; } if (params.beacon_csa.probe_resp[offset] != params.count) { err = -EINVAL; goto free; } } } skip_beacons: err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) goto free; if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, wdev->iftype)) { err = -EINVAL; goto free; } err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef, wdev->iftype); if (err < 0) goto free; if (err > 0) { params.radar_required = true; if (need_handle_dfs_flag && !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) { err = -EINVAL; goto free; } } if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { err = nl80211_parse_punct_bitmap(rdev, info, ¶ms.chandef, ¶ms.punct_bitmap); if (err) goto free; } err = rdev_channel_switch(rdev, dev, ¶ms); free: kfree(params.beacon_after.mbssid_ies); kfree(params.beacon_csa.mbssid_ies); kfree(params.beacon_after.rnr_ies); kfree(params.beacon_csa.rnr_ies); kfree(csa_attrs); return err; } static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_internal_bss *intbss) { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; unsigned int link_id; void *hdr; struct nlattr *bss; lockdep_assert_wiphy(wdev->wiphy); hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; bss = nla_nest_start_noflag(msg, NL80211_ATTR_BSS); if (!bss) goto nla_put_failure; if ((!is_zero_ether_addr(res->bssid) && nla_put(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid))) goto nla_put_failure; rcu_read_lock(); /* indicate whether we have probe response data or not */ if (rcu_access_pointer(res->proberesp_ies) && nla_put_flag(msg, NL80211_BSS_PRESP_DATA)) goto fail_unlock_rcu; /* this pointer prefers to be pointed to probe response data * but is always valid */ ies = rcu_dereference(res->ies); if (ies) { if (nla_put_u64_64bit(msg, NL80211_BSS_TSF, ies->tsf, NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, ies->len, ies->data)) goto fail_unlock_rcu; } /* and this pointer is always (unless driver didn't know) beacon data */ ies = rcu_dereference(res->beacon_ies); if (ies && ies->from_beacon) { if (nla_put_u64_64bit(msg, NL80211_BSS_BEACON_TSF, ies->tsf, NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, ies->len, ies->data)) goto fail_unlock_rcu; } rcu_read_unlock(); if (res->beacon_interval && nla_put_u16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval)) goto nla_put_failure; if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) || nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET, res->channel->freq_offset) || nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, jiffies_to_msecs(jiffies - intbss->ts))) goto nla_put_failure; if (intbss->parent_tsf && (nla_put_u64_64bit(msg, NL80211_BSS_PARENT_TSF, intbss->parent_tsf, NL80211_BSS_PAD) || nla_put(msg, NL80211_BSS_PARENT_BSSID, ETH_ALEN, intbss->parent_bssid))) goto nla_put_failure; if (intbss->ts_boottime && nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, intbss->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; if (!nl80211_put_signal(msg, intbss->pub.chains, intbss->pub.chain_signal, NL80211_BSS_CHAIN_SIGNAL)) goto nla_put_failure; switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) goto nla_put_failure; break; case CFG80211_SIGNAL_TYPE_UNSPEC: if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal)) goto nla_put_failure; break; default: break; } switch (wdev->iftype) { case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: for_each_valid_link(wdev, link_id) { if (intbss == wdev->links[link_id].client.current_bss && (nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_ASSOCIATED) || (wdev->valid_links && (nla_put_u8(msg, NL80211_BSS_MLO_LINK_ID, link_id) || nla_put(msg, NL80211_BSS_MLD_ADDR, ETH_ALEN, wdev->u.client.connected_addr))))) goto nla_put_failure; } break; case NL80211_IFTYPE_ADHOC: if (intbss == wdev->u.ibss.current_bss && nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_IBSS_JOINED)) goto nla_put_failure; break; default: break; } nla_nest_end(msg, bss); genlmsg_end(msg, hdr); return 0; fail_unlock_rcu: rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct cfg80211_internal_bss *scan; struct wireless_dev *wdev; int start = cb->args[2], idx = 0; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); spin_lock_bh(&rdev->bss_lock); /* * dump_scan will be called multiple times to break up the scan results * into multiple messages. It is unlikely that any more bss-es will be * expired after the first call, so only call only call this on the * first dump_scan invocation. */ if (start == 0) cfg80211_bss_expire(rdev); cb->seq = rdev->bss_generation; list_for_each_entry(scan, &rdev->bss_list, list) { if (++idx <= start) continue; if (nl80211_send_bss(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, scan) < 0) { idx--; break; } } spin_unlock_bh(&rdev->bss_lock); cb->args[2] = idx; wiphy_unlock(&rdev->wiphy); return skb->len; } static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, bool allow_radio_stats, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; /* skip radio stats if userspace didn't request them */ if (!survey->channel && !allow_radio_stats) return 0; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; infoattr = nla_nest_start_noflag(msg, NL80211_ATTR_SURVEY_INFO); if (!infoattr) goto nla_put_failure; if (survey->channel && nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, survey->channel->center_freq)) goto nla_put_failure; if (survey->channel && survey->channel->freq_offset && nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY_OFFSET, survey->channel->freq_offset)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_NOISE_DBM) && nla_put_u8(msg, NL80211_SURVEY_INFO_NOISE, survey->noise)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_IN_USE) && nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME, survey->time, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BUSY) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BUSY, survey->time_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, survey->time_ext_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_RX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_RX, survey->time_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_TX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_TX, survey->time_tx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_SCAN) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX, survey->time_bss_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; nla_nest_end(msg, infoattr); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr **attrbuf; struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; bool radio_stats; attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf); if (res) { kfree(attrbuf); return res; } /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); /* prepare_wdev_dump parsed the attributes */ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; if (!wdev->netdev) { res = -EINVAL; goto out_err; } if (!rdev->ops->dump_survey) { res = -EOPNOTSUPP; goto out_err; } while (1) { res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); if (res == -ENOENT) break; if (res) goto out_err; /* don't send disabled channels, but do send non-channel data */ if (survey.channel && survey.channel->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; continue; } if (nl80211_send_survey(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, radio_stats, &survey) < 0) goto out; survey_idx++; } out: cb->args[2] = survey_idx; res = skb->len; out_err: kfree(attrbuf); wiphy_unlock(&rdev->wiphy); return res; } static bool nl80211_valid_wpa_versions(u32 wpa_versions) { return !(wpa_versions & ~(NL80211_WPA_VERSION_1 | NL80211_WPA_VERSION_2 | NL80211_WPA_VERSION_3)); } static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ieee80211_channel *chan; const u8 *bssid, *ssid; int err, ssid_len; enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; struct cfg80211_auth_request req = {}; u32 freq; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_AUTH_TYPE]) return -EINVAL; if (!info->attrs[NL80211_ATTR_SSID]) return -EINVAL; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; err = nl80211_parse_key(info, &key); if (err) return err; if (key.idx >= 0) { if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; if (!key.p.key || !key.p.key_len) return -EINVAL; if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || key.p.key_len != WLAN_KEY_LEN_WEP40) && (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || key.p.key_len != WLAN_KEY_LEN_WEP104)) return -EINVAL; if (key.idx > 3) return -EINVAL; } else { key.p.key_len = 0; key.p.key = NULL; } if (key.idx >= 0) { int i; bool ok = false; for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) { if (key.p.cipher == rdev->wiphy.cipher_suites[i]) { ok = true; break; } } if (!ok) return -EINVAL; } if (!rdev->ops->auth) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return -EINVAL; ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; if ((auth_type == NL80211_AUTHTYPE_SAE || auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) && !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; if (info->attrs[NL80211_ATTR_AUTH_DATA]) { if (auth_type != NL80211_AUTHTYPE_SAE && auth_type != NL80211_AUTHTYPE_FILS_SK && auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && auth_type != NL80211_AUTHTYPE_FILS_PK) return -EINVAL; req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; /* * Since we no longer track auth state, ignore * requests to only change local state. */ if (local_state_change) return 0; req.auth_type = auth_type; req.key = key.p.key; req.key_len = key.p.key_len; req.key_idx = key.idx; req.link_id = nl80211_link_id_or_invalid(info->attrs); if (req.link_id >= 0) { if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); if (!is_valid_ether_addr(req.ap_mld_addr)) return -EINVAL; } req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); if (!req.bss) return -ENOENT; err = cfg80211_mlme_auth(rdev, dev, &req); cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } static int validate_pae_over_nl80211(struct cfg80211_registered_device *rdev, struct genl_info *info) { if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { GENL_SET_ERR_MSG(info, "SOCKET_OWNER not set"); return -EINVAL; } if (!rdev->ops->tx_control_port || !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) return -EOPNOTSUPP; return 0; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit) { memset(settings, 0, sizeof(*settings)); settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { u16 proto; proto = nla_get_u16( info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); settings->control_port_ethertype = cpu_to_be16(proto); if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && proto != ETH_P_PAE) return -EINVAL; if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]) settings->control_port_no_encrypt = true; } else settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) return r; settings->control_port_over_nl80211 = true; if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_PREAUTH]) settings->control_port_no_preauth = true; } if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; data = nla_data(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); len = nla_len(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); settings->n_ciphers_pairwise = len / sizeof(u32); if (len % sizeof(u32)) return -EINVAL; if (settings->n_ciphers_pairwise > cipher_limit) return -EINVAL; memcpy(settings->ciphers_pairwise, data, len); for (i = 0; i < settings->n_ciphers_pairwise; i++) if (!cfg80211_supported_cipher_suite( &rdev->wiphy, settings->ciphers_pairwise[i])) return -EINVAL; } if (info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]) { settings->cipher_group = nla_get_u32(info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]); if (!cfg80211_supported_cipher_suite(&rdev->wiphy, settings->cipher_group)) return -EINVAL; } if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) { settings->wpa_versions = nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]); if (!nl80211_valid_wpa_versions(settings->wpa_versions)) return -EINVAL; } if (info->attrs[NL80211_ATTR_AKM_SUITES]) { void *data; int len; data = nla_data(info->attrs[NL80211_ATTR_AKM_SUITES]); len = nla_len(info->attrs[NL80211_ATTR_AKM_SUITES]); settings->n_akm_suites = len / sizeof(u32); if (len % sizeof(u32)) return -EINVAL; if (settings->n_akm_suites > rdev->wiphy.max_num_akm_suites) return -EINVAL; memcpy(settings->akm_suites, data, len); } if (info->attrs[NL80211_ATTR_PMK]) { if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK)) return -EINVAL; settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); } if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP)) return -EINVAL; settings->sae_pwd = nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]); settings->sae_pwd_len = nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]); } if (info->attrs[NL80211_ATTR_SAE_PWE]) settings->sae_pwe = nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]); else settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED; return 0; } static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, struct nlattr **attrs) { struct ieee80211_channel *chan; struct cfg80211_bss *bss; const u8 *bssid; u32 freq; if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ]) return ERR_PTR(-EINVAL); bssid = nla_data(attrs[NL80211_ATTR_MAC]); freq = MHZ_TO_KHZ(nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ])); if (attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return ERR_PTR(-EINVAL); bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); if (!bss) return ERR_PTR(-ENOENT); return bss; } static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_assoc_request req = {}; struct nlattr **attrs = NULL; const u8 *ap_addr, *ssid; unsigned int link_id; int err, ssid_len; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_SSID]) return -EINVAL; if (!rdev->ops->assoc) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.ie, req.ie_len)) { NL_SET_ERR_MSG_ATTR(info->extack, info->attrs[NL80211_ATTR_IE], "non-inheritance makes no sense"); return -EINVAL; } } if (info->attrs[NL80211_ATTR_USE_MFP]) { enum nl80211_mfp mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); if (mfp == NL80211_MFP_REQUIRED) req.use_mfp = true; else if (mfp != NL80211_MFP_NO) return -EINVAL; } if (info->attrs[NL80211_ATTR_PREV_BSSID]) req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) req.flags |= ASSOC_REQ_DISABLE_HT; if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&req.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(req.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(req.ht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) req.flags |= ASSOC_REQ_DISABLE_VHT; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) req.flags |= ASSOC_REQ_DISABLE_HE; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) req.flags |= ASSOC_REQ_DISABLE_EHT; if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&req.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), sizeof(req.vht_capa_mask)); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.vht_capa, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), sizeof(req.vht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { if (!((rdev->wiphy.features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) && (rdev->wiphy.features & NL80211_FEATURE_QUIET)) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_RRM)) return -EINVAL; req.flags |= ASSOC_REQ_USE_RRM; } if (info->attrs[NL80211_ATTR_FILS_KEK]) { req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); if (!info->attrs[NL80211_ATTR_FILS_NONCES]) return -EINVAL; req.fils_nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); } if (info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]) { if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY]) return -EINVAL; memcpy(&req.s1g_capa_mask, nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]), sizeof(req.s1g_capa_mask)); } if (info->attrs[NL80211_ATTR_S1G_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.s1g_capa, nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]), sizeof(req.s1g_capa)); } req.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLO_LINKS]) { unsigned int attrsize = NUM_NL80211_ATTR * sizeof(*attrs); struct nlattr *link; int rem = 0; if (req.link_id < 0) return -EINVAL; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; if (info->attrs[NL80211_ATTR_MAC] || info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; attrs = kzalloc(attrsize, GFP_KERNEL); if (!attrs) return -ENOMEM; nla_for_each_nested(link, info->attrs[NL80211_ATTR_MLO_LINKS], rem) { memset(attrs, 0, attrsize); nla_parse_nested(attrs, NL80211_ATTR_MAX, link, NULL, NULL); if (!attrs[NL80211_ATTR_MLO_LINK_ID]) { err = -EINVAL; NL_SET_BAD_ATTR(info->extack, link); goto free; } link_id = nla_get_u8(attrs[NL80211_ATTR_MLO_LINK_ID]); /* cannot use the same link ID again */ if (req.links[link_id].bss) { err = -EINVAL; NL_SET_BAD_ATTR(info->extack, link); goto free; } req.links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs); if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; NL_SET_ERR_MSG_ATTR(info->extack, link, "Error fetching BSS for link"); goto free; } if (attrs[NL80211_ATTR_IE]) { req.links[link_id].elems = nla_data(attrs[NL80211_ATTR_IE]); req.links[link_id].elems_len = nla_len(attrs[NL80211_ATTR_IE]); if (cfg80211_find_elem(WLAN_EID_FRAGMENT, req.links[link_id].elems, req.links[link_id].elems_len)) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[NL80211_ATTR_IE], "cannot deal with fragmentation"); err = -EINVAL; goto free; } if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.links[link_id].elems, req.links[link_id].elems_len)) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[NL80211_ATTR_IE], "cannot deal with non-inheritance"); err = -EINVAL; goto free; } } req.links[link_id].disabled = nla_get_flag(attrs[NL80211_ATTR_MLO_LINK_DISABLED]); } if (!req.links[req.link_id].bss) { err = -EINVAL; goto free; } if (req.links[req.link_id].elems_len) { GENL_SET_ERR_MSG(info, "cannot have per-link elems on assoc link"); err = -EINVAL; goto free; } if (req.links[req.link_id].disabled) { GENL_SET_ERR_MSG(info, "cannot have assoc link disabled"); err = -EINVAL; goto free; } kfree(attrs); attrs = NULL; } else { if (req.link_id >= 0) return -EINVAL; req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs); if (IS_ERR(req.bss)) return PTR_ERR(req.bss); ap_addr = req.bss->bssid; } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { struct nlattr *link; int rem = 0; err = cfg80211_mlme_assoc(rdev, dev, &req); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; memcpy(dev->ieee80211_ptr->disconnect_bssid, ap_addr, ETH_ALEN); } /* Report error from first problematic link */ if (info->attrs[NL80211_ATTR_MLO_LINKS]) { nla_for_each_nested(link, info->attrs[NL80211_ATTR_MLO_LINKS], rem) { struct nlattr *link_id_attr = nla_find_nested(link, NL80211_ATTR_MLO_LINK_ID); if (!link_id_attr) continue; link_id = nla_get_u8(link_id_attr); if (link_id == req.link_id) continue; if (!req.links[link_id].error || WARN_ON(req.links[link_id].error > 0)) continue; WARN_ON(err >= 0); NL_SET_BAD_ATTR(info->extack, link); err = req.links[link_id].error; break; } } } free: for (link_id = 0; link_id < ARRAY_SIZE(req.links); link_id++) cfg80211_put_bss(&rdev->wiphy, req.links[link_id].bss); cfg80211_put_bss(&rdev->wiphy, req.bss); kfree(attrs); return err; } static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; int ie_len = 0; u16 reason_code; bool local_state_change; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REASON_CODE]) return -EINVAL; if (!rdev->ops->deauth) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (reason_code == 0) { /* Reason Code 0 is reserved */ return -EINVAL; } if (info->attrs[NL80211_ATTR_IE]) { ie = nla_data(info->attrs[NL80211_ATTR_IE]); ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, local_state_change); } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; int ie_len = 0; u16 reason_code; bool local_state_change; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REASON_CODE]) return -EINVAL; if (!rdev->ops->disassoc) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (reason_code == 0) { /* Reason Code 0 is reserved */ return -EINVAL; } if (info->attrs[NL80211_ATTR_IE]) { ie = nla_data(info->attrs[NL80211_ATTR_IE]); ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, local_state_change); } static bool nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, int mcast_rate[NUM_NL80211_BANDS], int rateval) { struct wiphy *wiphy = &rdev->wiphy; bool found = false; int band, i; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = wiphy->bands[band]; if (!sband) continue; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == rateval) { mcast_rate[band] = i + 1; found = true; break; } } } return found; } static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_ibss_params ibss; struct wiphy *wiphy; struct cfg80211_cached_keys *connkeys = NULL; int err; memset(&ibss, 0, sizeof(ibss)); if (!info->attrs[NL80211_ATTR_SSID] || !nla_len(info->attrs[NL80211_ATTR_SSID])) return -EINVAL; ibss.beacon_interval = 100; if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC, ibss.beacon_interval); if (err) return err; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; wiphy = &rdev->wiphy; if (info->attrs[NL80211_ATTR_MAC]) { ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(ibss.bssid)) return -EINVAL; } ibss.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ibss.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { ibss.ie = nla_data(info->attrs[NL80211_ATTR_IE]); ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } err = nl80211_parse_chandef(rdev, info, &ibss.chandef); if (err) return err; if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) return -EINVAL; break; case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_VHT_IBSS)) return -EINVAL; break; case NL80211_CHAN_WIDTH_320: return -EINVAL; default: return -EINVAL; } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); int n_rates = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); struct ieee80211_supported_band *sband = wiphy->bands[ibss.chandef.chan->band]; err = ieee80211_get_ratemask(sband, rates, n_rates, &ibss.basic_rates); if (err) return err; } if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&ibss.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(ibss.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) return -EINVAL; memcpy(&ibss.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(ibss.ht_capa)); } if (info->attrs[NL80211_ATTR_MCAST_RATE] && !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { bool no_ht = false; connkeys = nl80211_parse_connkeys(rdev, info, &no_ht); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) && no_ht) { kfree_sensitive(connkeys); return -EINVAL; } } ibss.control_port = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT]); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) { kfree_sensitive(connkeys); return r; } ibss.control_port_over_nl80211 = true; } ibss.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys); if (err) kfree_sensitive(connkeys); else if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; return err; } static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->leave_ibss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; return cfg80211_leave_ibss(rdev, dev, false); } static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int mcast_rate[NUM_NL80211_BANDS]; u32 nla_rate; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; if (!rdev->ops->set_mcast_rate) return -EOPNOTSUPP; memset(mcast_rate, 0, sizeof(mcast_rate)); if (!info->attrs[NL80211_ATTR_MCAST_RATE]) return -EINVAL; nla_rate = nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) return -EINVAL; return rdev_set_mcast_rate(rdev, dev, mcast_rate); } static struct sk_buff * __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int approxlen, u32 portid, u32 seq, enum nl80211_commands cmd, enum nl80211_attrs attr, const struct nl80211_vendor_cmd_info *info, gfp_t gfp) { struct sk_buff *skb; void *hdr; struct nlattr *data; skb = nlmsg_new(approxlen + 100, gfp); if (!skb) return NULL; hdr = nl80211hdr_put(skb, portid, seq, 0, cmd); if (!hdr) { kfree_skb(skb); return NULL; } if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; if (info) { if (nla_put_u32(skb, NL80211_ATTR_VENDOR_ID, info->vendor_id)) goto nla_put_failure; if (nla_put_u32(skb, NL80211_ATTR_VENDOR_SUBCMD, info->subcmd)) goto nla_put_failure; } if (wdev) { if (nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(skb, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; } data = nla_nest_start_noflag(skb, attr); if (!data) goto nla_put_failure; ((void **)skb->cb)[0] = rdev; ((void **)skb->cb)[1] = hdr; ((void **)skb->cb)[2] = data; return skb; nla_put_failure: kfree_skb(skb); return NULL; } struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_commands cmd, enum nl80211_attrs attr, unsigned int portid, int vendor_event_idx, int approxlen, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct nl80211_vendor_cmd_info *info; switch (cmd) { case NL80211_CMD_TESTMODE: if (WARN_ON(vendor_event_idx != -1)) return NULL; info = NULL; break; case NL80211_CMD_VENDOR: if (WARN_ON(vendor_event_idx < 0 || vendor_event_idx >= wiphy->n_vendor_events)) return NULL; info = &wiphy->vendor_events[vendor_event_idx]; break; default: WARN_ON(1); return NULL; } return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, portid, 0, cmd, attr, info, gfp); } EXPORT_SYMBOL(__cfg80211_alloc_event_skb); void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) { struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlmsghdr *nlhdr = nlmsg_hdr(skb); struct nlattr *data = ((void **)skb->cb)[2]; enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); nla_nest_end(skb, data); genlmsg_end(skb, hdr); if (nlhdr->nlmsg_pid) { genlmsg_unicast(wiphy_net(&rdev->wiphy), skb, nlhdr->nlmsg_pid); } else { if (data->nla_type == NL80211_ATTR_VENDOR_DATA) mcgrp = NL80211_MCGRP_VENDOR; genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0, mcgrp, gfp); } } EXPORT_SYMBOL(__cfg80211_send_event_skb); #ifdef CONFIG_NL80211_TESTMODE static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev; int err; lockdep_assert_held(&rdev->wiphy.mtx); wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), info->attrs); if (!rdev->ops->testmode_cmd) return -EOPNOTSUPP; if (IS_ERR(wdev)) { err = PTR_ERR(wdev); if (err != -EINVAL) return err; wdev = NULL; } else if (wdev->wiphy != &rdev->wiphy) { return -EINVAL; } if (!info->attrs[NL80211_ATTR_TESTDATA]) return -EINVAL; rdev->cur_cmd_info = info; err = rdev_testmode_cmd(rdev, wdev, nla_data(info->attrs[NL80211_ATTR_TESTDATA]), nla_len(info->attrs[NL80211_ATTR_TESTDATA])); rdev->cur_cmd_info = NULL; return err; } static int nl80211_testmode_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct nlattr **attrbuf = NULL; int err; long phy_idx; void *data = NULL; int data_len = 0; rtnl_lock(); if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], * so we need to offset by 1. */ phy_idx = cb->args[0] - 1; rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { err = -ENOENT; goto out_err; } } else { attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) { err = -ENOMEM; goto out_err; } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) goto out_err; rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_err; } phy_idx = rdev->wiphy_idx; if (attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; } if (cb->args[1]) { data = nla_data((void *)cb->args[1]); data_len = nla_len((void *)cb->args[1]); } if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; goto out_err; } while (1) { void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; if (!hdr) break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) { genlmsg_cancel(skb, hdr); break; } tmdata = nla_nest_start_noflag(skb, NL80211_ATTR_TESTDATA); if (!tmdata) { genlmsg_cancel(skb, hdr); break; } err = rdev_testmode_dump(rdev, skb, cb, data, data_len); nla_nest_end(skb, tmdata); if (err == -ENOBUFS || err == -ENOENT) { genlmsg_cancel(skb, hdr); break; } else if (err) { genlmsg_cancel(skb, hdr); goto out_err; } genlmsg_end(skb, hdr); } err = skb->len; /* see above */ cb->args[0] = phy_idx + 1; out_err: kfree(attrbuf); rtnl_unlock(); return err; } #endif static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_connect_params connect; struct wiphy *wiphy; struct cfg80211_cached_keys *connkeys = NULL; u32 freq = 0; int err; memset(&connect, 0, sizeof(connect)); if (!info->attrs[NL80211_ATTR_SSID] || !nla_len(info->attrs[NL80211_ATTR_SSID])) return -EINVAL; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { connect.auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, connect.auth_type, NL80211_CMD_CONNECT)) return -EINVAL; } else connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EINVAL; connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS]; err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) return err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; wiphy = &rdev->wiphy; connect.bg_scan_period = -1; if (info->attrs[NL80211_ATTR_BG_SCAN_PERIOD] && (wiphy->flags & WIPHY_FLAG_SUPPORTS_FW_ROAM)) { connect.bg_scan_period = nla_get_u16(info->attrs[NL80211_ATTR_BG_SCAN_PERIOD]); } if (info->attrs[NL80211_ATTR_MAC]) connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); else if (info->attrs[NL80211_ATTR_MAC_HINT]) connect.bssid_hint = nla_data(info->attrs[NL80211_ATTR_MAC_HINT]); connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } if (info->attrs[NL80211_ATTR_USE_MFP]) { connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); if (connect.mfp == NL80211_MFP_OPTIONAL && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_MFP_OPTIONAL)) return -EOPNOTSUPP; } else { connect.mfp = NL80211_MFP_NO; } if (info->attrs[NL80211_ATTR_PREV_BSSID]) connect.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) freq = MHZ_TO_KHZ(nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); if (freq) { connect.channel = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel) return -EINVAL; } else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) { freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]); freq = MHZ_TO_KHZ(freq); connect.channel_hint = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel_hint) return -EINVAL; } if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { connect.edmg.channels = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) connect.edmg.bw_config = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info, NULL); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) connect.flags |= ASSOC_REQ_DISABLE_HT; if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&connect.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(connect.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) { kfree_sensitive(connkeys); return -EINVAL; } memcpy(&connect.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(connect.ht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) connect.flags |= ASSOC_REQ_DISABLE_VHT; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) connect.flags |= ASSOC_REQ_DISABLE_HE; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) connect.flags |= ASSOC_REQ_DISABLE_EHT; if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&connect.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), sizeof(connect.vht_capa_mask)); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { kfree_sensitive(connkeys); return -EINVAL; } memcpy(&connect.vht_capa, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), sizeof(connect.vht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { if (!((rdev->wiphy.features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) && (rdev->wiphy.features & NL80211_FEATURE_QUIET)) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_RRM)) { kfree_sensitive(connkeys); return -EINVAL; } connect.flags |= ASSOC_REQ_USE_RRM; } connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (connect.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { kfree_sensitive(connkeys); return -EOPNOTSUPP; } if (info->attrs[NL80211_ATTR_BSS_SELECT]) { /* bss selection makes no sense if bssid is set */ if (connect.bssid) { kfree_sensitive(connkeys); return -EINVAL; } err = parse_bss_select(info->attrs[NL80211_ATTR_BSS_SELECT], wiphy, &connect.bss_select); if (err) { kfree_sensitive(connkeys); return err; } } if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && info->attrs[NL80211_ATTR_FILS_ERP_REALM] && info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { connect.fils_erp_username = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_username_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_realm = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_realm_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_next_seq_num = nla_get_u16( info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); connect.fils_erp_rrk = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); connect.fils_erp_rrk_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || info->attrs[NL80211_ATTR_FILS_ERP_REALM] || info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { kfree_sensitive(connkeys); return -EINVAL; } if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) { if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { kfree_sensitive(connkeys); GENL_SET_ERR_MSG(info, "external auth requires connection ownership"); return -EINVAL; } connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT; } if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT])) connect.flags |= CONNECT_REQ_MLO_SUPPORT; err = cfg80211_connect(rdev, dev, &connect, connkeys, connect.prev_bssid); if (err) kfree_sensitive(connkeys); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; if (connect.bssid) memcpy(dev->ieee80211_ptr->disconnect_bssid, connect.bssid, ETH_ALEN); else eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid); } return err; } static int nl80211_update_connect_params(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_connect_params connect = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; bool fils_sk_offload; u32 auth_type; u32 changed = 0; if (!rdev->ops->update_connect_params) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_IE]) { connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); changed |= UPDATE_ASSOC_IES; } fils_sk_offload = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD); /* * when driver supports fils-sk offload all attributes must be * provided. So the else covers "fils-sk-not-all" and * "no-fils-sk-any". */ if (fils_sk_offload && info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && info->attrs[NL80211_ATTR_FILS_ERP_REALM] && info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { connect.fils_erp_username = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_username_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_realm = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_realm_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_next_seq_num = nla_get_u16( info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); connect.fils_erp_rrk = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); connect.fils_erp_rrk_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); changed |= UPDATE_FILS_ERP_INFO; } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || info->attrs[NL80211_ATTR_FILS_ERP_REALM] || info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { return -EINVAL; } if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_CONNECT)) return -EINVAL; if (auth_type == NL80211_AUTHTYPE_FILS_SK && fils_sk_offload && !(changed & UPDATE_FILS_ERP_INFO)) return -EINVAL; connect.auth_type = auth_type; changed |= UPDATE_AUTH_TYPE; } if (!wdev->connected) return -ENOLINK; return rdev_update_connect_params(rdev, dev, &connect, changed); } static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_REASON_CODE]) reason = WLAN_REASON_DEAUTH_LEAVING; else reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (reason == 0) return -EINVAL; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; return cfg80211_disconnect(rdev, dev, reason, true); } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net *net; int err; if (info->attrs[NL80211_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); net = get_net_ns_by_pid(pid); } else if (info->attrs[NL80211_ATTR_NETNS_FD]) { u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]); net = get_net_ns_by_fd(fd); } else { return -EINVAL; } if (IS_ERR(net)) return PTR_ERR(net); err = 0; /* check if anything to do */ if (!net_eq(wiphy_net(&rdev->wiphy), net)) err = cfg80211_switch_netns(rdev, net); put_net(net); return err; } static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_pmksa *pmksa) = NULL; struct net_device *dev = info->user_ptr[1]; struct cfg80211_pmksa pmksa; memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID] && info->attrs[NL80211_ATTR_FILS_CACHE_ID] && (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA || info->attrs[NL80211_ATTR_PMK])) { pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); } else { return -EINVAL; } if (info->attrs[NL80211_ATTR_PMK]) { pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); } if (info->attrs[NL80211_ATTR_PMK_LIFETIME]) pmksa.pmk_lifetime = nla_get_u32(info->attrs[NL80211_ATTR_PMK_LIFETIME]); if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]) pmksa.pmk_reauth_threshold = nla_get_u8( info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AP_PMKSA_CACHING))) return -EOPNOTSUPP; switch (info->genlhdr->cmd) { case NL80211_CMD_SET_PMKSA: rdev_ops = rdev->ops->set_pmksa; break; case NL80211_CMD_DEL_PMKSA: rdev_ops = rdev->ops->del_pmksa; break; default: WARN_ON(1); break; } if (!rdev_ops) return -EOPNOTSUPP; return rdev_ops(&rdev->wiphy, dev, &pmksa); } static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!rdev->ops->flush_pmksa) return -EOPNOTSUPP; return rdev_flush_pmksa(rdev, dev); } static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 action_code, dialog_token; u32 peer_capability = 0; u16 status_code; u8 *peer; int link_id; bool initiator; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_mgmt) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TDLS_ACTION] || !info->attrs[NL80211_ATTR_STATUS_CODE] || !info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN] || !info->attrs[NL80211_ATTR_IE] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; peer = nla_data(info->attrs[NL80211_ATTR_MAC]); action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]); if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) peer_capability = nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); link_id = nl80211_link_id_or_invalid(info->attrs); return rdev_tdls_mgmt(rdev, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, nla_data(info->attrs[NL80211_ATTR_IE]), nla_len(info->attrs[NL80211_ATTR_IE])); } static int nl80211_tdls_oper(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; enum nl80211_tdls_operation operation; u8 *peer; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_oper) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TDLS_OPERATION] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; operation = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_OPERATION]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_tdls_oper(rdev, dev, peer, operation); } static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; struct sk_buff *msg; void *hdr; u64 cookie; u32 duration; int err; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_DURATION]) return -EINVAL; duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); if (!rdev->ops->remain_on_channel || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)) return -EOPNOTSUPP; /* * We should be on that channel for at least a minimum amount of * time (10ms) but no longer than the driver supports. */ if (duration < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { const struct cfg80211_chan_def *oper_chandef, *compat_chandef; oper_chandef = wdev_chandef(wdev, link_id); if (WARN_ON(!oper_chandef)) { /* cannot happen since we must beacon to get here */ WARN_ON(1); return -EBUSY; } /* note: returns first one if identical chandefs */ compat_chandef = cfg80211_chandef_compatible(&chandef, oper_chandef); if (compat_chandef != &chandef) return -EBUSY; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (!hdr) { err = -ENOBUFS; goto free_msg; } err = rdev_remain_on_channel(rdev, wdev, chandef.chan, duration, &cookie); if (err) goto free_msg; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; if (!rdev->ops->cancel_remain_on_channel) return -EOPNOTSUPP; cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_bitrate_mask mask; unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true, link_id); if (err) return err; return rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) return -EINVAL; if (info->attrs[NL80211_ATTR_FRAME_TYPE]) frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } /* not much point in registering if we can't reply */ if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_RECEIVE_MULTICAST] && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS)) { GENL_SET_ERR_MSG(info, "multicast RX registrations are not supported"); return -EOPNOTSUPP; } return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]), info->attrs[NL80211_ATTR_RECEIVE_MULTICAST], info->extack); } static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; int err; void *hdr = NULL; u64 cookie; struct sk_buff *msg = NULL; struct cfg80211_mgmt_tx_params params = { .dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK], }; if (!info->attrs[NL80211_ATTR_FRAME]) return -EINVAL; if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; params.wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); /* * We should wait on the channel for at least a minimum amount * of time (10ms) but no longer than the driver supports. */ if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || params.wait > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; } params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; if (params.offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; params.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* get the channel if any has been specified, otherwise pass NULL to * the driver. The latter will use the current one */ chandef.chan = NULL; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; } if (!chandef.chan && params.offchan) return -EINVAL; if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) return -EBUSY; params.link_id = nl80211_link_id_or_invalid(info->attrs); /* * This now races due to the unlock, but we cannot check * the valid links for the _station_ anyway, so that's up * to the driver. */ if (params.link_id >= 0 && !(wdev->valid_links & BIT(params.link_id))) return -EINVAL; params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); int i; if (len % sizeof(u16)) return -EINVAL; params.n_csa_offsets = len / sizeof(u16); params.csa_offsets = nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); /* check that all the offsets fit the frame */ for (i = 0; i < params.n_csa_offsets; i++) { if (params.csa_offsets[i] >= params.len) return -EINVAL; } } if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (!hdr) { err = -ENOBUFS; goto free_msg; } } params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) goto free_msg; if (msg) { if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); } return 0; nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; if (!rdev->ops->mgmt_tx_cancel_wait) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return rdev_mgmt_tx_cancel_wait(rdev, wdev, cookie); } static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev; struct net_device *dev = info->user_ptr[1]; u8 ps_state; bool state; int err; if (!info->attrs[NL80211_ATTR_PS_STATE]) return -EINVAL; ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]); wdev = dev->ieee80211_ptr; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; state = (ps_state == NL80211_PS_ENABLED) ? true : false; if (state == wdev->ps) return 0; err = rdev_set_power_mgmt(rdev, dev, state, wdev->ps_timeout); if (!err) wdev->ps = state; return err; } static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; enum nl80211_ps_state ps_state; struct wireless_dev *wdev; struct net_device *dev = info->user_ptr[1]; struct sk_buff *msg; void *hdr; int err; wdev = dev->ieee80211_ptr; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; goto free_msg; } if (wdev->ps) ps_state = NL80211_PS_ENABLED; else ps_state = NL80211_PS_DISABLED; if (nla_put_u32(msg, NL80211_ATTR_PS_STATE, ps_state)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static const struct nla_policy nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_LEVEL] = { .type = NLA_S32 }, }; static int nl80211_set_cqm_txe(struct genl_info *info, u32 rate, u32 pkts, u32 intvl) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL) return -EINVAL; if (!rdev->ops->set_cqm_txe_config) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl); } static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_cqm_config *cqm_config) { struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; int i, n, low_index; int err; /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a * connection is established and enough beacons received to calculate * the average. */ if (!cqm_config->last_rssi_event_value && wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; u8 *mac_addr; mac_addr = wdev->links[0].client.current_bss->pub.bssid; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) return err; cfg80211_sinfo_release_content(&sinfo); if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; } last = cqm_config->last_rssi_event_value; hyst = cqm_config->rssi_hyst; n = cqm_config->n_rssi_thresholds; for (i = 0; i < n; i++) { i = array_index_nospec(i, n); if (last < cqm_config->rssi_thresholds[i]) break; } low_index = i - 1; if (low_index >= 0) { low_index = array_index_nospec(low_index, n); low = cqm_config->rssi_thresholds[low_index] - hyst; } else { low = S32_MIN; } if (i < n) { i = array_index_nospec(i, n); high = cqm_config->rssi_thresholds[i] + hyst - 1; } else { high = S32_MAX; } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } static int nl80211_set_cqm_rssi(struct genl_info *info, const s32 *thresholds, int n_thresholds, u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_cqm_config *cqm_config = NULL, *old; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; s32 prev = S32_MIN; int i, err; /* Check all values negative and sorted */ for (i = 0; i < n_thresholds; i++) { if (thresholds[i] > 0 || thresholds[i] <= prev) return -EINVAL; prev = thresholds[i]; } if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; old = wiphy_dereference(wdev->wiphy, wdev->cqm_config); /* if already disabled just succeed */ if (!n_thresholds && !old) return 0; if (n_thresholds > 1) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST) || !rdev->ops->set_cqm_rssi_range_config) return -EOPNOTSUPP; } else { if (!rdev->ops->set_cqm_rssi_config) return -EOPNOTSUPP; } if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); if (!cqm_config) return -ENOMEM; cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); cqm_config->use_range_api = n_thresholds > 1 || !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); if (cqm_config->use_range_api) err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); else err = rdev_set_cqm_rssi_config(rdev, dev, thresholds[0], hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); /* if enabled as range also disable via range */ if (old->use_range_api) err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); else err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); } else { kfree_rcu(old, rcu_head); } return err; } static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1]; struct nlattr *cqm; int err; cqm = info->attrs[NL80211_ATTR_CQM]; if (!cqm) return -EINVAL; err = nla_parse_nested_deprecated(attrs, NL80211_ATTR_CQM_MAX, cqm, nl80211_attr_cqm_policy, info->extack); if (err) return err; if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && attrs[NL80211_ATTR_CQM_RSSI_HYST]) { const s32 *thresholds = nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); if (len % 4) return -EINVAL; return nl80211_set_cqm_rssi(info, thresholds, len / 4, hysteresis); } if (attrs[NL80211_ATTR_CQM_TXE_RATE] && attrs[NL80211_ATTR_CQM_TXE_PKTS] && attrs[NL80211_ATTR_CQM_TXE_INTVL]) { u32 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]); u32 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]); u32 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]); return nl80211_set_cqm_txe(info, rate, pkts, intvl); } return -EINVAL; } static int nl80211_join_ocb(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ocb_setup setup = {}; int err; err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) return err; return cfg80211_join_ocb(rdev, dev, &setup); } static int nl80211_leave_ocb(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; return cfg80211_leave_ocb(rdev, dev); } static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct mesh_config cfg; struct mesh_setup setup; int err; /* start with default */ memcpy(&cfg, &default_mesh_config, sizeof(cfg)); memcpy(&setup, &default_mesh_setup, sizeof(setup)); if (info->attrs[NL80211_ATTR_MESH_CONFIG]) { /* and parse parameters if given */ err = nl80211_parse_mesh_config(info, &cfg, NULL); if (err) return err; } if (!info->attrs[NL80211_ATTR_MESH_ID] || !nla_len(info->attrs[NL80211_ATTR_MESH_ID])) return -EINVAL; setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); if (info->attrs[NL80211_ATTR_MCAST_RATE] && !nl80211_parse_mcast_rate(rdev, setup.mcast_rate, nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_MESH_POINT, setup.beacon_interval); if (err) return err; } if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { setup.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); if (setup.dtim_period < 1 || setup.dtim_period > 100) return -EINVAL; } if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); if (err) return err; } if (setup.user_mpm) cfg.auto_open_plinks = false; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) return err; } else { /* __cfg80211_join_mesh() will sort it out */ setup.chandef.chan = NULL; } if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); int n_rates = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); struct ieee80211_supported_band *sband; if (!setup.chandef.chan) return -EINVAL; sband = rdev->wiphy.bands[setup.chandef.chan->band]; err = ieee80211_get_ratemask(sband, rates, n_rates, &setup.basic_rates); if (err) return err; } if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &setup.beacon_rate, dev, false, 0); if (err) return err; if (!setup.chandef.chan) return -EINVAL; err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, &setup.beacon_rate); if (err) return err; } setup.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) return r; setup.control_port_over_nl80211 = true; } err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; return err; } static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; return cfg80211_leave_mesh(rdev, dev); } #ifdef CONFIG_PM static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start_noflag(msg, i + 1); if (!nl_pat) return -ENOBUFS; pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), wowlan->patterns[i].mask) || nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_PKTPAT_OFFSET, wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } nla_nest_end(msg, nl_pats); return 0; } static int nl80211_send_wowlan_tcp(struct sk_buff *msg, struct cfg80211_wowlan_tcp *tcp) { struct nlattr *nl_tcp; if (!tcp) return 0; nl_tcp = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->payload_len, tcp->payload) || nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, tcp->data_interval) || nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, tcp->wake_len, tcp->wake_data) || nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) return -ENOBUFS; if (tcp->payload_seq.len && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, sizeof(tcp->payload_seq), &tcp->payload_seq)) return -ENOBUFS; if (tcp->payload_tok.len && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, sizeof(tcp->payload_tok) + tcp->tokens_size, &tcp->payload_tok)) return -ENOBUFS; nla_nest_end(msg, nl_tcp); return 0; } static int nl80211_send_wowlan_nd(struct sk_buff *msg, struct cfg80211_sched_scan_request *req) { struct nlattr *nd, *freqs, *matches, *match, *scan_plans, *scan_plan; int i; if (!req) return 0; nd = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT); if (!nd) return -ENOBUFS; if (req->n_scan_plans == 1 && nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->scan_plans[0].interval * 1000)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) return -ENOBUFS; if (req->relative_rssi_set) { struct nl80211_bss_select_rssi_adjust rssi_adjust; if (nla_put_s8(msg, NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, req->relative_rssi)) return -ENOBUFS; rssi_adjust.band = req->rssi_adjust.band; rssi_adjust.delta = req->rssi_adjust.delta; if (nla_put(msg, NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, sizeof(rssi_adjust), &rssi_adjust)) return -ENOBUFS; } freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, req->channels[i]->center_freq)) return -ENOBUFS; } nla_nest_end(msg, freqs); if (req->n_match_sets) { matches = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_MATCH); if (!matches) return -ENOBUFS; for (i = 0; i < req->n_match_sets; i++) { match = nla_nest_start_noflag(msg, i); if (!match) return -ENOBUFS; if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, req->match_sets[i].ssid.ssid_len, req->match_sets[i].ssid.ssid)) return -ENOBUFS; nla_nest_end(msg, match); } nla_nest_end(msg, matches); } scan_plans = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_PLANS); if (!scan_plans) return -ENOBUFS; for (i = 0; i < req->n_scan_plans; i++) { scan_plan = nla_nest_start_noflag(msg, i + 1); if (!scan_plan) return -ENOBUFS; if (nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, req->scan_plans[i].interval) || (req->scan_plans[i].iterations && nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS, req->scan_plans[i].iterations))) return -ENOBUFS; nla_nest_end(msg, scan_plan); } nla_nest_end(msg, scan_plans); nla_nest_end(msg, nd); return 0; } static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; u32 size = NLMSG_DEFAULT_SIZE; if (!rdev->wiphy.wowlan) return -EOPNOTSUPP; if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ size += rdev->wiphy.wowlan_config->tcp->tokens_size + rdev->wiphy.wowlan_config->tcp->payload_len + rdev->wiphy.wowlan_config->tcp->wake_len + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; if (nl80211_send_wowlan_tcp(msg, rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; if (nl80211_send_wowlan_nd( msg, rdev->wiphy.wowlan_config->nd_config)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, struct nlattr *attr, struct cfg80211_wowlan *trig) { struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; struct cfg80211_wowlan_tcp *cfg; struct nl80211_wowlan_tcp_data_token *tok = NULL; struct nl80211_wowlan_tcp_data_seq *seq = NULL; u32 size; u32 data_size, wake_size, tokens_size = 0, wake_mask_size; int err, port; if (!rdev->wiphy.wowlan->tcp) return -EINVAL; err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TCP, attr, nl80211_wowlan_tcp_policy, NULL); if (err) return err; if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || !tb[NL80211_WOWLAN_TCP_DST_IPV4] || !tb[NL80211_WOWLAN_TCP_DST_MAC] || !tb[NL80211_WOWLAN_TCP_DST_PORT] || !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) return -EINVAL; data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max) return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > rdev->wiphy.wowlan->tcp->data_interval_max || nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max) return -EINVAL; wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) return -EINVAL; if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); tokens_size = tokln - sizeof(*tok); if (!tok->len || tokens_size % tok->len) return -EINVAL; if (!rdev->wiphy.wowlan->tcp->tok) return -EINVAL; if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len) return -EINVAL; if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len) return -EINVAL; if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize) return -EINVAL; if (tok->offset + tok->len > data_size) return -EINVAL; } if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); if (!rdev->wiphy.wowlan->tcp->seq) return -EINVAL; if (seq->len == 0 || seq->len > 4) return -EINVAL; if (seq->len + seq->offset > data_size) return -EINVAL; } size = sizeof(*cfg); size += data_size; size += wake_size + wake_mask_size; size += tokens_size; cfg = kzalloc(size, GFP_KERNEL); if (!cfg) return -ENOMEM; cfg->src = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]); memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), ETH_ALEN); if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); else port = 0; #ifdef CONFIG_INET /* allocate a socket and port for it and use it */ err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, IPPROTO_TCP, &cfg->sock, 1); if (err) { kfree(cfg); return err; } if (inet_csk_get_port(cfg->sock->sk, port)) { sock_release(cfg->sock); kfree(cfg); return -EADDRINUSE; } cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; #else if (!port) { kfree(cfg); return -EINVAL; } cfg->src_port = port; #endif cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); cfg->payload_len = data_size; cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; memcpy((void *)cfg->payload, nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), data_size); if (seq) cfg->payload_seq = *seq; cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); cfg->wake_len = wake_size; cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; memcpy((void *)cfg->wake_data, nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), wake_size); cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size + wake_size; memcpy((void *)cfg->wake_mask, nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), wake_mask_size); if (tok) { cfg->tokens_size = tokens_size; cfg->payload_tok = *tok; memcpy(cfg->payload_tok.token_stream, tok->token_stream, tokens_size); } trig->tcp = cfg; return 0; } static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, const struct wiphy_wowlan_support *wowlan, struct nlattr *attr, struct cfg80211_wowlan *trig) { struct nlattr **tb; int err; tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; if (!(wowlan->flags & WIPHY_WOWLAN_NET_DETECT)) { err = -EOPNOTSUPP; goto out; } err = nla_parse_nested_deprecated(tb, NL80211_ATTR_MAX, attr, nl80211_policy, NULL); if (err) goto out; trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, NULL, tb, wowlan->max_nd_match_sets); err = PTR_ERR_OR_ZERO(trig->nd_config); if (err) trig->nd_config = NULL; out: kfree(tb); return err; } static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; struct cfg80211_wowlan new_triggers = {}; struct cfg80211_wowlan *ntrig; const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; bool regular = false; if (!wowlan) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TRIG, info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], nl80211_wowlan_policy, info->extack); if (err) return err; if (tb[NL80211_WOWLAN_TRIG_ANY]) { if (!(wowlan->flags & WIPHY_WOWLAN_ANY)) return -EINVAL; new_triggers.any = true; } if (tb[NL80211_WOWLAN_TRIG_DISCONNECT]) { if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT)) return -EINVAL; new_triggers.disconnect = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) { if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT)) return -EINVAL; new_triggers.magic_pkt = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED]) return -EINVAL; if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE]) { if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE)) return -EINVAL; new_triggers.gtk_rekey_failure = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) { if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ)) return -EINVAL; new_triggers.eap_identity_req = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) { if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE)) return -EINVAL; new_triggers.four_way_handshake = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) { if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE)) return -EINVAL; new_triggers.rfkill_release = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; regular = true; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) n_patterns++; if (n_patterns > wowlan->n_patterns) return -EINVAL; new_triggers.patterns = kcalloc(n_patterns, sizeof(new_triggers.patterns[0]), GFP_KERNEL); if (!new_triggers.patterns) return -ENOMEM; new_triggers.n_patterns = n_patterns; i = 0; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { u8 *mask_pat; err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, pat, nl80211_packet_pattern_policy, info->extack); if (err) goto error; err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) goto error; pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]); mask_len = DIV_ROUND_UP(pat_len, 8); if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len) goto error; if (pat_len > wowlan->pattern_max_len || pat_len < wowlan->pattern_min_len) goto error; if (!pat_tb[NL80211_PKTPAT_OFFSET]) pkt_offset = 0; else pkt_offset = nla_get_u32( pat_tb[NL80211_PKTPAT_OFFSET]); if (pkt_offset > wowlan->max_pkt_offset) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!mask_pat) { err = -ENOMEM; goto error; } new_triggers.patterns[i].mask = mask_pat; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); mask_pat += mask_len; new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; } } if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { regular = true; err = nl80211_parse_wowlan_tcp( rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], &new_triggers); if (err) goto error; } if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) { regular = true; err = nl80211_parse_wowlan_nd( rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT], &new_triggers); if (err) goto error; } /* The 'any' trigger means the device continues operating more or less * as in its normal operation mode and wakes up the host on most of the * normal interrupts (like packet RX, ...) * It therefore makes little sense to combine with the more constrained * wakeup trigger modes. */ if (new_triggers.any && regular) { err = -EINVAL; goto error; } ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; goto error; } cfg80211_rdev_free_wowlan(rdev); rdev->wiphy.wowlan_config = ntrig; set_wakeup: if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wiphy.wowlan_config) rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); if (new_triggers.tcp && new_triggers.tcp->sock) sock_release(new_triggers.tcp->sock); kfree(new_triggers.tcp); kfree(new_triggers.nd_config); return err; } #endif static int nl80211_send_coalesce_rules(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct nlattr *nl_pats, *nl_pat, *nl_rule, *nl_rules; int i, j, pat_len; struct cfg80211_coalesce_rules *rule; if (!rdev->coalesce->n_rules) return 0; nl_rules = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE); if (!nl_rules) return -ENOBUFS; for (i = 0; i < rdev->coalesce->n_rules; i++) { nl_rule = nla_nest_start_noflag(msg, i + 1); if (!nl_rule) return -ENOBUFS; rule = &rdev->coalesce->rules[i]; if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_DELAY, rule->delay)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_CONDITION, rule->condition)) return -ENOBUFS; nl_pats = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (j = 0; j < rule->n_patterns; j++) { nl_pat = nla_nest_start_noflag(msg, j + 1); if (!nl_pat) return -ENOBUFS; pat_len = rule->patterns[j].pattern_len; if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), rule->patterns[j].mask) || nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len, rule->patterns[j].pattern) || nla_put_u32(msg, NL80211_PKTPAT_OFFSET, rule->patterns[j].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } nla_nest_end(msg, nl_pats); nla_nest_end(msg, nl_rule); } nla_nest_end(msg, nl_rules); return 0; } static int nl80211_get_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; if (!rdev->wiphy.coalesce) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_COALESCE); if (!hdr) goto nla_put_failure; if (rdev->coalesce && nl80211_send_coalesce_rules(msg, rdev)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) { struct cfg80211_coalesce *coalesce = rdev->coalesce; int i, j; struct cfg80211_coalesce_rules *rule; if (!coalesce) return; for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); } kfree(coalesce->rules); kfree(coalesce); rdev->coalesce = NULL; } static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, struct nlattr *rule, struct cfg80211_coalesce_rules *new_rule) { int err, i; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; struct nlattr *tb[NUM_NL80211_ATTR_COALESCE_RULE], *pat; int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; err = nla_parse_nested_deprecated(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, nl80211_coalesce_policy, NULL); if (err) return err; if (tb[NL80211_ATTR_COALESCE_RULE_DELAY]) new_rule->delay = nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_DELAY]); if (new_rule->delay > coalesce->max_delay) return -EINVAL; if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION]) new_rule->condition = nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]); if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN]) return -EINVAL; nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) n_patterns++; if (n_patterns > coalesce->n_patterns) return -EINVAL; new_rule->patterns = kcalloc(n_patterns, sizeof(new_rule->patterns[0]), GFP_KERNEL); if (!new_rule->patterns) return -ENOMEM; new_rule->n_patterns = n_patterns; i = 0; nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { u8 *mask_pat; err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, pat, nl80211_packet_pattern_policy, NULL); if (err) return err; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]); mask_len = DIV_ROUND_UP(pat_len, 8); if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len) return -EINVAL; if (pat_len > coalesce->pattern_max_len || pat_len < coalesce->pattern_min_len) return -EINVAL; if (!pat_tb[NL80211_PKTPAT_OFFSET]) pkt_offset = 0; else pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]); if (pkt_offset > coalesce->max_pkt_offset) return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!mask_pat) return -ENOMEM; new_rule->patterns[i].mask = mask_pat; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); mask_pat += mask_len; new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; } return 0; } static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; struct cfg80211_coalesce new_coalesce = {}; struct cfg80211_coalesce *n_coalesce; int err, rem_rule, n_rules = 0, i, j; struct nlattr *rule; struct cfg80211_coalesce_rules *tmp_rule; if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { cfg80211_rdev_free_coalesce(rdev); rdev_set_coalesce(rdev, NULL); return 0; } nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) n_rules++; if (n_rules > coalesce->n_rules) return -EINVAL; new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]), GFP_KERNEL); if (!new_coalesce.rules) return -ENOMEM; new_coalesce.n_rules = n_rules; i = 0; nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) { err = nl80211_parse_coalesce_rule(rdev, rule, &new_coalesce.rules[i]); if (err) goto error; i++; } err = rdev_set_coalesce(rdev, &new_coalesce); if (err) goto error; n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL); if (!n_coalesce) { err = -ENOMEM; goto error; } cfg80211_rdev_free_coalesce(rdev); rdev->coalesce = n_coalesce; return 0; error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); } kfree(new_coalesce.rules); return err; } static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; err = nla_parse_nested_deprecated(tb, MAX_NL80211_REKEY_DATA, info->attrs[NL80211_ATTR_REKEY_DATA], nl80211_rekey_policy, info->extack); if (err) return err; if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || !tb[NL80211_REKEY_DATA_KCK]) return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN)) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN) && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KCK_32 && nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN_32)) return -ERANGE; rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]); rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]); rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]); rekey_data.kek_len = nla_len(tb[NL80211_REKEY_DATA_KEK]); rekey_data.kck_len = nla_len(tb[NL80211_REKEY_DATA_KCK]); if (tb[NL80211_REKEY_DATA_AKM]) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); if (!wdev->connected) return -ENOTCONN; if (!rdev->ops->set_rekey_data) return -EOPNOTSUPP; return rdev_set_rekey_data(rdev, dev, &rekey_data); } static int nl80211_register_unexpected_frame(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (wdev->ap_unexpected_nlportid) return -EBUSY; wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } static int nl80211_probe_client(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct sk_buff *msg; void *hdr; const u8 *addr; u64 cookie; int err; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!rdev->ops->probe_client) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (!hdr) { err = -ENOBUFS; goto free_msg; } addr = nla_data(info->attrs[NL80211_ATTR_MAC]); err = rdev_probe_client(rdev, dev, addr, &cookie); if (err) goto free_msg; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_beacon_registration *reg, *nreg; int rv; if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; nreg = kzalloc(sizeof(*nreg), GFP_KERNEL); if (!nreg) return -ENOMEM; /* First, check if already registered. */ spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry(reg, &rdev->beacon_registrations, list) { if (reg->nlportid == info->snd_portid) { rv = -EALREADY; goto out_err; } } /* Add it to the list */ nreg->nlportid = info->snd_portid; list_add(&nreg->list, &rdev->beacon_registrations); spin_unlock_bh(&rdev->beacon_registrations_lock); return 0; out_err: spin_unlock_bh(&rdev->beacon_registrations_lock); kfree(nreg); return rv; } static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; int err; if (!rdev->ops->start_p2p_device) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; if (wdev_running(wdev)) return 0; if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); if (err) return err; wdev->is_running = true; rdev->opencount++; return 0; } static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; cfg80211_stop_p2p_device(rdev, wdev); return 0; } static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_nan_conf conf = {}; int err; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (wdev_running(wdev)) return -EEXIST; if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) return -EINVAL; conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (info->attrs[NL80211_ATTR_BANDS]) { u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); if (bands & ~(u32)wdev->wiphy->nan_supported_bands) return -EOPNOTSUPP; if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) return -EINVAL; conf.bands = bands; } err = rdev_start_nan(rdev, wdev, &conf); if (err) return err; wdev->is_running = true; rdev->opencount++; return 0; } static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; cfg80211_stop_nan(rdev, wdev); return 0; } static int validate_nan_filter(struct nlattr *filter_attr) { struct nlattr *attr; int len = 0, n_entries = 0, rem; nla_for_each_nested(attr, filter_attr, rem) { len += nla_len(attr); n_entries++; } if (len >= U8_MAX) return -EINVAL; return n_entries; } static int handle_nan_filter(struct nlattr *attr_filter, struct cfg80211_nan_func *func, bool tx) { struct nlattr *attr; int n_entries, rem, i; struct cfg80211_nan_func_filter *filter; n_entries = validate_nan_filter(attr_filter); if (n_entries < 0) return n_entries; BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); if (!filter) return -ENOMEM; i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); if (!filter[i].filter) goto err; filter[i].len = nla_len(attr); i++; } if (tx) { func->num_tx_filters = n_entries; func->tx_filters = filter; } else { func->num_rx_filters = n_entries; func->rx_filters = filter; } return 0; err: i = 0; nla_for_each_nested(attr, attr_filter, rem) { kfree(filter[i].filter); i++; } kfree(filter); return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; struct cfg80211_nan_func *func; struct sk_buff *msg = NULL; void *hdr = NULL; int err = 0; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_NAN_FUNC]) return -EINVAL; err = nla_parse_nested_deprecated(tb, NL80211_NAN_FUNC_ATTR_MAX, info->attrs[NL80211_ATTR_NAN_FUNC], nl80211_nan_func_policy, info->extack); if (err) return err; func = kzalloc(sizeof(*func), GFP_KERNEL); if (!func) return -ENOMEM; func->cookie = cfg80211_assign_cookie(rdev); if (!tb[NL80211_NAN_FUNC_TYPE]) { err = -EINVAL; goto out; } func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { err = -EINVAL; goto out; } memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), sizeof(func->service_id)); func->close_range = nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { func->serv_spec_info_len = nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); func->serv_spec_info = kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), func->serv_spec_info_len, GFP_KERNEL); if (!func->serv_spec_info) { err = -ENOMEM; goto out; } } if (tb[NL80211_NAN_FUNC_TTL]) func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); switch (func->type) { case NL80211_NAN_FUNC_PUBLISH: if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { err = -EINVAL; goto out; } func->publish_type = nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); func->publish_bcast = nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && func->publish_bcast) { err = -EINVAL; goto out; } break; case NL80211_NAN_FUNC_SUBSCRIBE: func->subscribe_active = nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } func->followup_id = nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); func->followup_reqid = nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); memcpy(func->followup_dest.addr, nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), sizeof(func->followup_dest.addr)); if (func->ttl) { err = -EINVAL; goto out; } break; default: err = -EINVAL; goto out; } if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; err = nla_parse_nested_deprecated(srf_tb, NL80211_NAN_SRF_ATTR_MAX, tb[NL80211_NAN_FUNC_SRF], nl80211_nan_srf_policy, info->extack); if (err) goto out; func->srf_include = nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); if (srf_tb[NL80211_NAN_SRF_BF]) { if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || !srf_tb[NL80211_NAN_SRF_BF_IDX]) { err = -EINVAL; goto out; } func->srf_bf_len = nla_len(srf_tb[NL80211_NAN_SRF_BF]); func->srf_bf = kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), func->srf_bf_len, GFP_KERNEL); if (!func->srf_bf) { err = -ENOMEM; goto out; } func->srf_bf_idx = nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); } else { struct nlattr *attr, *mac_attr = srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; int n_entries, rem, i = 0; if (!mac_attr) { err = -EINVAL; goto out; } n_entries = validate_acl_mac_addrs(mac_attr); if (n_entries <= 0) { err = -EINVAL; goto out; } func->srf_num_macs = n_entries; func->srf_macs = kcalloc(n_entries, sizeof(*func->srf_macs), GFP_KERNEL); if (!func->srf_macs) { err = -ENOMEM; goto out; } nla_for_each_nested(attr, mac_attr, rem) memcpy(func->srf_macs[i++].addr, nla_data(attr), sizeof(*func->srf_macs)); } } if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], func, true); if (err) goto out; } if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], func, false); if (err) goto out; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_ADD_NAN_FUNCTION); /* This can't really happen - we just allocated 4KB */ if (WARN_ON(!hdr)) { err = -ENOMEM; goto out; } err = rdev_add_nan_func(rdev, wdev, func); out: if (err < 0) { cfg80211_free_nan_func(func); nlmsg_free(msg); return err; } /* propagate the instance id and cookie to userspace */ if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, NL80211_ATTR_PAD)) goto nla_put_failure; func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, func->instance_id)) goto nla_put_failure; nla_nest_end(msg, func_attr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_nan_del_func(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); rdev_del_nan_func(rdev, wdev, cookie); return 0; } static int nl80211_nan_change_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_nan_conf conf = {}; u32 changed = 0; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (conf.master_pref <= 1 || conf.master_pref == 255) return -EINVAL; changed |= CFG80211_NAN_CONF_CHANGED_PREF; } if (info->attrs[NL80211_ATTR_BANDS]) { u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); if (bands & ~(u32)wdev->wiphy->nan_supported_bands) return -EOPNOTSUPP; if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) return -EINVAL; conf.bands = bands; changed |= CFG80211_NAN_CONF_CHANGED_BANDS; } if (!changed) return -EINVAL; return rdev_nan_change_conf(rdev, wdev, &conf, changed); } void cfg80211_nan_match(struct wireless_dev *wdev, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct nlattr *match_attr, *local_func_attr, *peer_func_attr; struct sk_buff *msg; void *hdr; if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) goto nla_put_failure; match_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_MATCH); if (!match_attr) goto nla_put_failure; local_func_attr = nla_nest_start_noflag(msg, NL80211_NAN_MATCH_FUNC_LOCAL); if (!local_func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) goto nla_put_failure; nla_nest_end(msg, local_func_attr); peer_func_attr = nla_nest_start_noflag(msg, NL80211_NAN_MATCH_FUNC_PEER); if (!peer_func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) goto nla_put_failure; if (match->info && match->info_len && nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, match->info)) goto nla_put_failure; nla_nest_end(msg, peer_func_attr); nla_nest_end(msg, match_attr); genlmsg_end(msg, hdr); if (!wdev->owner_nlportid) genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_NAN, gfp); else genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->owner_nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_nan_match); void cfg80211_nan_func_terminated(struct wireless_dev *wdev, u8 inst_id, enum nl80211_nan_func_term_reason reason, u64 cookie, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *func_attr; void *hdr; if (WARN_ON(!inst_id)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) goto nla_put_failure; nla_nest_end(msg, func_attr); genlmsg_end(msg, hdr); if (!wdev->owner_nlportid) genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_NAN, gfp); else genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->owner_nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_nan_func_terminated); static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { void *hdr; struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_PROTOCOL_FEATURES); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES, NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: kfree_skb(msg); return -ENOBUFS; } static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_update_ft_ies_params ft_params; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->update_ft_ies) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MDID] || !info->attrs[NL80211_ATTR_IE]) return -EINVAL; memset(&ft_params, 0, sizeof(ft_params)); ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); return rdev_update_ft_ies(rdev, dev, &ft_params); } static int nl80211_crit_protocol_start(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC; u16 duration; int ret; if (!rdev->ops->crit_proto_start) return -EOPNOTSUPP; if (WARN_ON(!rdev->ops->crit_proto_stop)) return -EINVAL; if (rdev->crit_proto_nlportid) return -EBUSY; /* determine protocol if provided */ if (info->attrs[NL80211_ATTR_CRIT_PROT_ID]) proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]); if (proto >= NUM_NL80211_CRIT_PROTO) return -EINVAL; /* timeout must be provided */ if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]) return -EINVAL; duration = nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]); ret = rdev_crit_proto_start(rdev, wdev, proto, duration); if (!ret) rdev->crit_proto_nlportid = info->snd_portid; return ret; } static int nl80211_crit_protocol_stop(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->crit_proto_stop) return -EOPNOTSUPP; if (rdev->crit_proto_nlportid) { rdev->crit_proto_nlportid = 0; rdev_crit_proto_stop(rdev, wdev); } return 0; } static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd, struct nlattr *attr, struct netlink_ext_ack *extack) { if (vcmd->policy == VENDOR_CMD_RAW_DATA) { if (attr->nla_type & NLA_F_NESTED) { NL_SET_ERR_MSG_ATTR(extack, attr, "unexpected nested data"); return -EINVAL; } return 0; } if (!(attr->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR(extack, attr, "expected nested data"); return -EINVAL; } return nla_validate_nested(attr, vcmd->maxattr, vcmd->policy, extack); } static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), info->attrs); int i, err; u32 vid, subcmd; if (!rdev->wiphy.vendor_commands) return -EOPNOTSUPP; if (IS_ERR(wdev)) { err = PTR_ERR(wdev); if (err != -EINVAL) return err; wdev = NULL; } else if (wdev->wiphy != &rdev->wiphy) { return -EINVAL; } if (!info->attrs[NL80211_ATTR_VENDOR_ID] || !info->attrs[NL80211_ATTR_VENDOR_SUBCMD]) return -EINVAL; vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; void *data = NULL; int len = 0; vcmd = &rdev->wiphy.vendor_commands[i]; if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { if (!wdev) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && !wdev->netdev) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (!wdev_running(wdev)) return -ENETDOWN; } } else { wdev = NULL; } if (!vcmd->doit) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); err = nl80211_vendor_check_policy(vcmd, info->attrs[NL80211_ATTR_VENDOR_DATA], info->extack); if (err) return err; } rdev->cur_cmd_info = info; err = vcmd->doit(&rdev->wiphy, wdev, data, len); rdev->cur_cmd_info = NULL; return err; } return -EOPNOTSUPP; } static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { struct nlattr **attrbuf; u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; int err; void *data = NULL; unsigned int data_len = 0; if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; if (cb->args[1]) { list_for_each_entry(tmp, &wiphy->wdev_list, list) { if (tmp->identifier == cb->args[1] - 1) { *wdev = tmp; break; } } } /* keep rtnl locked in successful case */ return 0; } attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) goto out; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { err = -EINVAL; goto out; } *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*rdev)) { err = PTR_ERR(*rdev); goto out; } vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; vcmd = &(*rdev)->wiphy.vendor_commands[i]; if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; if (!vcmd->dumpit) { err = -EOPNOTSUPP; goto out; } vcmd_idx = i; break; } if (vcmd_idx < 0) { err = -EOPNOTSUPP; goto out; } if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]); err = nl80211_vendor_check_policy( &(*rdev)->wiphy.vendor_commands[vcmd_idx], attrbuf[NL80211_ATTR_VENDOR_DATA], cb->extack); if (err) goto out; } /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; /* add 1 to know if it was NULL */ cb->args[1] = *wdev ? (*wdev)->identifier + 1 : 0; cb->args[2] = vcmd_idx; cb->args[3] = (unsigned long)data; cb->args[4] = data_len; /* keep rtnl locked in successful case */ err = 0; out: kfree(attrbuf); return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; unsigned int vcmd_idx; const struct wiphy_vendor_command *vcmd; void *data; int data_len; int err; struct nlattr *vendor_data; rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; data_len = cb->args[4]; vcmd = &rdev->wiphy.vendor_commands[vcmd_idx]; if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { if (!wdev) { err = -EINVAL; goto out; } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && !wdev->netdev) { err = -EINVAL; goto out; } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (!wdev_running(wdev)) { err = -ENETDOWN; goto out; } } } while (1) { void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_VENDOR); if (!hdr) break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev && nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD))) { genlmsg_cancel(skb, hdr); break; } vendor_data = nla_nest_start_noflag(skb, NL80211_ATTR_VENDOR_DATA); if (!vendor_data) { genlmsg_cancel(skb, hdr); break; } err = vcmd->dumpit(&rdev->wiphy, wdev, skb, data, data_len, (unsigned long *)&cb->args[5]); nla_nest_end(skb, vendor_data); if (err == -ENOBUFS || err == -ENOENT) { genlmsg_cancel(skb, hdr); break; } else if (err <= 0) { genlmsg_cancel(skb, hdr); goto out; } genlmsg_end(skb, hdr); } err = skb->len; out: rtnl_unlock(); return err; } struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_commands cmd, enum nl80211_attrs attr, int approxlen) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return NULL; return __cfg80211_alloc_vendor_skb(rdev, NULL, approxlen, rdev->cur_cmd_info->snd_portid, rdev->cur_cmd_info->snd_seq, cmd, attr, NULL, GFP_KERNEL); } EXPORT_SYMBOL(__cfg80211_alloc_reply_skb); int cfg80211_vendor_cmd_reply(struct sk_buff *skb) { struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); if (WARN_ON(!rdev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; } nla_nest_end(skb, data); genlmsg_end(skb, hdr); return genlmsg_reply(skb, rdev->cur_cmd_info); } EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply); unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return 0; return rdev->cur_cmd_info->snd_portid; } EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_get_sender); static int nl80211_set_qos_map(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_qos_map *qos_map = NULL; struct net_device *dev = info->user_ptr[1]; u8 *pos, len, num_des, des_len, des; int ret; if (!rdev->ops->set_qos_map) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_QOS_MAP]) { pos = nla_data(info->attrs[NL80211_ATTR_QOS_MAP]); len = nla_len(info->attrs[NL80211_ATTR_QOS_MAP]); if (len % 2) return -EINVAL; qos_map = kzalloc(sizeof(struct cfg80211_qos_map), GFP_KERNEL); if (!qos_map) return -ENOMEM; num_des = (len - IEEE80211_QOS_MAP_LEN_MIN) >> 1; if (num_des) { des_len = num_des * sizeof(struct cfg80211_dscp_exception); memcpy(qos_map->dscp_exception, pos, des_len); qos_map->num_des = num_des; for (des = 0; des < num_des; des++) { if (qos_map->dscp_exception[des].up > 7) { kfree(qos_map); return -EINVAL; } } pos += des_len; } memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); } ret = nl80211_key_allowed(dev->ieee80211_ptr); if (!ret) ret = rdev_set_qos_map(rdev, dev, qos_map); kfree(qos_map); return ret; } static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *peer; u8 tsid, up; u16 admitted_time = 0; if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_USER_PRIO]) return -EINVAL; tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]); /* WMM uses TIDs 0-7 even for TSPEC */ if (tsid >= IEEE80211_FIRST_TSPEC_TSID) { /* TODO: handle 802.11 TSPEC/admission control * need more attributes for that (e.g. BA session requirement); * change the WMM adminssion test above to allow both then */ return -EINVAL; } peer = nla_data(info->attrs[NL80211_ATTR_MAC]); if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) { admitted_time = nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]); if (!admitted_time) return -EINVAL; } switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; return -ENOTCONN; default: return -EOPNOTSUPP; } return rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); } static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *peer; u8 tsid; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_del_tx_ts(rdev, dev, tsid, peer); } static int nl80211_tdls_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef = {}; const u8 *addr; u8 oper_class; int err; if (!rdev->ops->tdls_channel_switch || !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: break; default: return -EOPNOTSUPP; } if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_OPER_CLASS]) return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; /* * Don't allow wide channels on the 2.4Ghz band, as per IEEE802.11-2012 * section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the * specification is not defined for them. */ if (chandef.chan->band == NL80211_BAND_2GHZ && chandef.width != NL80211_CHAN_WIDTH_20_NOHT && chandef.width != NL80211_CHAN_WIDTH_20) return -EINVAL; /* we will be active on the TDLS link */ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, wdev->iftype)) return -EINVAL; /* don't allow switching to DFS channels */ if (cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, wdev->iftype)) return -EINVAL; addr = nla_data(info->attrs[NL80211_ATTR_MAC]); oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]); return rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); } static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *addr; if (!rdev->ops->tdls_channel_switch || !rdev->ops->tdls_cancel_channel_switch || !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: break; default: return -EOPNOTSUPP; } if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; addr = nla_data(info->attrs[NL80211_ATTR_MAC]); rdev_tdls_cancel_channel_switch(rdev, dev, addr); return 0; } static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const struct nlattr *nla; bool enabled; if (!rdev->ops->set_multicast_to_unicast) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED]; enabled = nla_get_flag(nla); return rdev_set_multicast_to_unicast(rdev, dev, enabled); } static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_pmk_conf pmk_conf = {}; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) return -EINVAL; if (!wdev->connected) return -ENOTCONN; pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) return -EINVAL; pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); if (pmk_conf.pmk_len != WLAN_PMK_LEN && pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) return -EINVAL; if (info->attrs[NL80211_ATTR_PMKR0_NAME]) pmk_conf.pmk_r0_name = nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); return rdev_set_pmk(rdev, dev, &pmk_conf); } static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *aa; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; aa = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_del_pmk(rdev, dev, aa); } static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_external_auth_params params; if (!rdev->ops->external_auth) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_SSID] && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (!info->attrs[NL80211_ATTR_BSSID]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STATUS_CODE]) return -EINVAL; memset(¶ms, 0, sizeof(params)); if (info->attrs[NL80211_ATTR_SSID]) { params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (params.ssid.ssid_len == 0) return -EINVAL; memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]), params.ssid.ssid_len); } memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); if (info->attrs[NL80211_ATTR_PMKID]) params.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); return rdev_external_auth(rdev, dev, ¶ms); } static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) { bool dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *buf; size_t len; u8 *dest; u16 proto; bool noencrypt; u64 cookie = 0; int link_id; int err; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) return -EOPNOTSUPP; if (!rdev->ops->tx_control_port) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { GENL_SET_ERR_MSG(info, "Frame, MAC or ethertype missing"); return -EINVAL; } switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) break; return -ENOTCONN; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; return -ENOTCONN; default: return -EOPNOTSUPP; } buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); dest = nla_data(info->attrs[NL80211_ATTR_MAC]); proto = nla_get_u16(info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); noencrypt = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); link_id = nl80211_link_id_or_invalid(info->attrs); err = rdev_tx_control_port(rdev, dev, buf, len, dest, cpu_to_be16(proto), noencrypt, link_id, dont_wait_for_ack ? NULL : &cookie); if (!err && !dont_wait_for_ack) nl_set_extack_cookie_u64(info->extack, cookie); return err; } static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ftm_responder_stats ftm_stats = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct sk_buff *msg; void *hdr; struct nlattr *ftm_stats_attr; int err; if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->links[link_id].ap.beacon_interval) return -EOPNOTSUPP; err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats); if (err) return err; if (!ftm_stats.filled) return -ENODATA; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_FTM_RESPONDER_STATS); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; ftm_stats_attr = nla_nest_start_noflag(msg, NL80211_ATTR_FTM_RESPONDER_STATS); if (!ftm_stats_attr) goto nla_put_failure; #define SET_FTM(field, name, type) \ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \ nla_put_ ## type(msg, NL80211_FTM_STATS_ ## name, \ ftm_stats.field)) \ goto nla_put_failure; } while (0) #define SET_FTM_U64(field, name) \ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \ nla_put_u64_64bit(msg, NL80211_FTM_STATS_ ## name, \ ftm_stats.field, NL80211_FTM_STATS_PAD)) \ goto nla_put_failure; } while (0) SET_FTM(success_num, SUCCESS_NUM, u32); SET_FTM(partial_num, PARTIAL_NUM, u32); SET_FTM(failed_num, FAILED_NUM, u32); SET_FTM(asap_num, ASAP_NUM, u32); SET_FTM(non_asap_num, NON_ASAP_NUM, u32); SET_FTM_U64(total_duration_ms, TOTAL_DURATION_MSEC); SET_FTM(unknown_triggers_num, UNKNOWN_TRIGGERS_NUM, u32); SET_FTM(reschedule_requests_num, RESCHEDULE_REQUESTS_NUM, u32); SET_FTM(out_of_window_triggers_num, OUT_OF_WINDOW_TRIGGERS_NUM, u32); #undef SET_FTM nla_nest_end(msg, ftm_stats_attr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_update_owe_info(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_update_owe_info owe_info; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->update_owe_info) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_STATUS_CODE] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; memset(&owe_info, 0, sizeof(owe_info)); owe_info.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); nla_memcpy(owe_info.peer, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); if (info->attrs[NL80211_ATTR_IE]) { owe_info.ie = nla_data(info->attrs[NL80211_ATTR_IE]); owe_info.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } return rdev_update_owe_info(rdev, dev, &owe_info); } static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_info sinfo = {}; const u8 *buf; size_t len; u8 *dest; int err; if (!rdev->ops->probe_mesh_link || !rdev->ops->get_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_FRAME]) { GENL_SET_ERR_MSG(info, "Frame or MAC missing"); return -EINVAL; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; dest = nla_data(info->attrs[NL80211_ATTR_MAC]); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); if (len < sizeof(struct ethhdr)) return -EINVAL; if (!ether_addr_equal(buf, dest) || is_multicast_ether_addr(buf) || !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr)) return -EINVAL; err = rdev_get_station(rdev, dev, dest, &sinfo); if (err) return err; cfg80211_sinfo_release_content(&sinfo); return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, struct cfg80211_tid_cfg *tid_conf, struct genl_info *info, const u8 *peer, unsigned int link_id) { struct netlink_ext_ack *extack = info->extack; u64 mask; int err; if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) return -EINVAL; tid_conf->config_override = nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); tid_conf->tids = nla_get_u16(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); if (tid_conf->config_override) { if (rdev->ops->reset_tid_config) { err = rdev_reset_tid_config(rdev, dev, peer, tid_conf->tids); if (err) return err; } else { return -EINVAL; } } if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_NOACK); tid_conf->noack = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT); tid_conf->retry_short = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]); if (tid_conf->retry_short > rdev->wiphy.max_data_retry_count) return -EINVAL; } if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); tid_conf->retry_long = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]); if (tid_conf->retry_long > rdev->wiphy.max_data_retry_count) return -EINVAL; } if (attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL); tid_conf->ampdu = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL); tid_conf->rtscts = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMSDU_CTRL); tid_conf->amsdu = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE]) { u32 idx = NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, attr; tid_conf->txrate_type = nla_get_u8(attrs[idx]); if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) { attr = NL80211_TID_CONFIG_ATTR_TX_RATE; err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, &tid_conf->txrate_mask, dev, true, link_id); if (err) return err; tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE); } tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE); } if (peer) mask = rdev->wiphy.tid_config_support.peer; else mask = rdev->wiphy.tid_config_support.vif; if (tid_conf->mask & ~mask) { NL_SET_ERR_MSG(extack, "unsupported TID configuration"); return -ENOTSUPP; } return 0; } static int nl80211_set_tid_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct cfg80211_tid_config *tid_config; struct nlattr *tid; int conf_idx = 0, rem_conf; int ret = -EINVAL; u32 num_conf = 0; if (!info->attrs[NL80211_ATTR_TID_CONFIG]) return -EINVAL; if (!rdev->ops->set_tid_config) return -EOPNOTSUPP; nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) num_conf++; tid_config = kzalloc(struct_size(tid_config, tid_conf, num_conf), GFP_KERNEL); if (!tid_config) return -ENOMEM; tid_config->n_tid_conf = num_conf; if (info->attrs[NL80211_ATTR_MAC]) tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) { ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, tid, NULL, NULL); if (ret) goto bad_tid_conf; ret = parse_tid_conf(rdev, attrs, dev, &tid_config->tid_conf[conf_idx], info, tid_config->peer, link_id); if (ret) goto bad_tid_conf; conf_idx++; } ret = rdev_set_tid_config(rdev, dev, tid_config); bad_tid_conf: kfree(tid_config); return ret; } static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_color_change_settings params = {}; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr **tb; u16 offset; int err; if (!rdev->ops->color_change) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BSS_COLOR)) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_AP) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] || !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] || !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS]) return -EINVAL; params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]); params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]); err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_next, info->extack); if (err) return err; tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; err = nla_parse_nested(tb, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS], nl80211_policy, info->extack); if (err) goto out; err = nl80211_parse_beacon(rdev, tb, ¶ms.beacon_color_change, info->extack); if (err) goto out; if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { err = -EINVAL; goto out; } if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) { err = -EINVAL; goto out; } offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]); if (offset >= params.beacon_color_change.tail_len) { err = -EINVAL; goto out; } if (params.beacon_color_change.tail[offset] != params.count) { err = -EINVAL; goto out; } params.counter_offset_beacon = offset; if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) != sizeof(u16)) { err = -EINVAL; goto out; } offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]); if (offset >= params.beacon_color_change.probe_resp_len) { err = -EINVAL; goto out; } if (params.beacon_color_change.probe_resp[offset] != params.count) { err = -EINVAL; goto out; } params.counter_offset_presp = offset; } err = rdev_color_change(rdev, dev, ¶ms); out: kfree(params.beacon_next.mbssid_ies); kfree(params.beacon_color_change.mbssid_ies); kfree(params.beacon_next.rnr_ies); kfree(params.beacon_color_change.rnr_ies); kfree(tb); return err; } static int nl80211_set_fils_aad(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_fils_aad fils_aad = {}; u8 *nonces; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_FILS_KEK] || !info->attrs[NL80211_ATTR_FILS_NONCES]) return -EINVAL; fils_aad.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); fils_aad.kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); fils_aad.kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); fils_aad.snonce = nonces; fils_aad.anonce = nonces + FILS_NONCE_LEN; return rdev_set_fils_aad(rdev, dev, &fils_aad); } static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; int ret; if (!(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; switch (wdev->iftype) { case NL80211_IFTYPE_AP: break; default: return -EINVAL; } if (!info->attrs[NL80211_ATTR_MAC] || !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) return -EINVAL; wdev->valid_links |= BIT(link_id); ether_addr_copy(wdev->links[link_id].addr, nla_data(info->attrs[NL80211_ATTR_MAC])); ret = rdev_add_intf_link(rdev, wdev, link_id); if (ret) { wdev->valid_links &= ~BIT(link_id); eth_zero_addr(wdev->links[link_id].addr); } return ret; } static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) { unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; /* cannot remove if there's no link */ if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; switch (wdev->iftype) { case NL80211_IFTYPE_AP: break; default: return -EINVAL; } cfg80211_remove_link(wdev, link_id); return 0; } static int nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info, bool add) { struct link_station_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int err; if ((add && !rdev->ops->add_link_station) || (!add && !rdev->ops->mod_link_station)) return -EOPNOTSUPP; if (add && !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; if (add && !info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); if (info->attrs[NL80211_ATTR_MAC]) { params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_mac)) return -EINVAL; } if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params.he_capa, (const u8 *)params.eht_capa, params.eht_capa_len, false)) return -EINVAL; } } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.opmode_notif_used = true; params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } err = nl80211_parse_sta_txpower_setting(info, ¶ms.txpwr, ¶ms.txpwr_set); if (err) return err; if (add) return rdev_add_link_station(rdev, dev, ¶ms); return rdev_mod_link_station(rdev, dev, ¶ms); } static int nl80211_add_link_station(struct sk_buff *skb, struct genl_info *info) { return nl80211_add_mod_link_station(skb, info, true); } static int nl80211_modify_link_station(struct sk_buff *skb, struct genl_info *info) { return nl80211_add_mod_link_station(skb, info, false); } static int nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) { struct link_station_del_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->del_link_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MLD_ADDR] || !info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); return rdev_del_link_station(rdev, dev, ¶ms); } static int nl80211_set_hw_timestamp(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_set_hw_timestamp hwts = {}; if (!rdev->wiphy.hw_timestamp_max_peers) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] && rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MAC]) hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); hwts.enable = nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]); return rdev_set_hw_timestamp(rdev, dev, &hwts); } #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 #define NL80211_FLAG_CHECK_NETDEV_UP 0x08 #define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_NEED_WDEV 0x10 /* If a netdev is associated, it must be UP, P2P must be started */ #define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_CLEAR_SKB 0x20 #define NL80211_FLAG_NO_WIPHY_MTX 0x40 #define NL80211_FLAG_MLO_VALID_LINK_ID 0x80 #define NL80211_FLAG_MLO_UNSUPPORTED 0x100 #define INTERNAL_FLAG_SELECTORS(__sel) \ SELECTOR(__sel, NONE, 0) /* must be first */ \ SELECTOR(__sel, WIPHY, \ NL80211_FLAG_NEED_WIPHY) \ SELECTOR(__sel, WDEV, \ NL80211_FLAG_NEED_WDEV) \ SELECTOR(__sel, NETDEV, \ NL80211_FLAG_NEED_NETDEV) \ SELECTOR(__sel, NETDEV_LINK, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, NETDEV_NO_MLO, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, WIPHY_RTNL, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_RTNL_NOMTX, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL | \ NL80211_FLAG_NO_WIPHY_MTX) \ SELECTOR(__sel, WDEV_RTNL, \ NL80211_FLAG_NEED_WDEV | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_RTNL, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_UP, \ NL80211_FLAG_NEED_NETDEV_UP) \ SELECTOR(__sel, NETDEV_UP_LINK, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, NETDEV_UP_NO_MLO, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NO_MLO_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NOTMX, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX) \ SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ SELECTOR(__sel, WDEV_UP_LINK, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, WDEV_UP_RTNL, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_CLEAR, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_CLEAR_SKB) enum nl80211_internal_flags_selector { #define SELECTOR(_, name, value) NL80211_IFL_SEL_##name, INTERNAL_FLAG_SELECTORS(_) #undef SELECTOR }; static u32 nl80211_internal_flags[] = { #define SELECTOR(_, name, value) [NL80211_IFL_SEL_##name] = value, INTERNAL_FLAG_SELECTORS(_) #undef SELECTOR }; static int nl80211_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; struct wireless_dev *wdev = NULL; struct net_device *dev = NULL; u32 internal_flags; int err; if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags))) return -EINVAL; internal_flags = nl80211_internal_flags[ops->internal_flags]; rtnl_lock(); if (internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_unlock; } info->user_ptr[0] = rdev; } else if (internal_flags & NL80211_FLAG_NEED_NETDEV || internal_flags & NL80211_FLAG_NEED_WDEV) { wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { err = PTR_ERR(wdev); goto out_unlock; } dev = wdev->netdev; dev_hold(dev); rdev = wiphy_to_rdev(wdev->wiphy); if (internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { err = -EINVAL; goto out_unlock; } info->user_ptr[1] = dev; } else { info->user_ptr[1] = wdev; } if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { err = -ENETDOWN; goto out_unlock; } info->user_ptr[0] = rdev; } if (internal_flags & NL80211_FLAG_MLO_VALID_LINK_ID) { struct nlattr *link_id = info->attrs[NL80211_ATTR_MLO_LINK_ID]; if (!wdev) { err = -EINVAL; goto out_unlock; } /* MLO -> require valid link ID */ if (wdev->valid_links && (!link_id || !(wdev->valid_links & BIT(nla_get_u8(link_id))))) { err = -EINVAL; goto out_unlock; } /* non-MLO -> no link ID attribute accepted */ if (!wdev->valid_links && link_id) { err = -EINVAL; goto out_unlock; } } if (internal_flags & NL80211_FLAG_MLO_UNSUPPORTED) { if (info->attrs[NL80211_ATTR_MLO_LINK_ID] || (wdev && wdev->valid_links)) { err = -EINVAL; goto out_unlock; } } if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { wiphy_lock(&rdev->wiphy); /* we keep the mutex locked until post_doit */ __release(&rdev->wiphy.mtx); } if (!(internal_flags & NL80211_FLAG_NEED_RTNL)) rtnl_unlock(); return 0; out_unlock: rtnl_unlock(); dev_put(dev); return err; } static void nl80211_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { u32 internal_flags = nl80211_internal_flags[ops->internal_flags]; if (info->user_ptr[1]) { if (internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; dev_put(wdev->netdev); } else { dev_put(info->user_ptr[1]); } } if (info->user_ptr[0] && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; /* we kept the mutex locked since pre_doit */ __acquire(&rdev->wiphy.mtx); wiphy_unlock(&rdev->wiphy); } if (internal_flags & NL80211_FLAG_NEED_RTNL) rtnl_unlock(); /* If needed, clear the netlink message payload from the SKB * as it might contain key data that shouldn't stick around on * the heap after the SKB is freed. The netlink message header * is still needed for further processing, so leave it intact. */ if (internal_flags & NL80211_FLAG_CLEAR_SKB) { struct nlmsghdr *nlh = nlmsg_hdr(skb); memset(nlmsg_data(nlh), 0, nlmsg_len(nlh)); } } static int nl80211_set_sar_sub_specs(struct cfg80211_registered_device *rdev, struct cfg80211_sar_specs *sar_specs, struct nlattr *spec[], int index) { u32 range_index, i; if (!sar_specs || !spec) return -EINVAL; if (!spec[NL80211_SAR_ATTR_SPECS_POWER] || !spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]) return -EINVAL; range_index = nla_get_u32(spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]); /* check if range_index exceeds num_freq_ranges */ if (range_index >= rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; /* check if range_index duplicates */ for (i = 0; i < index; i++) { if (sar_specs->sub_specs[i].freq_range_index == range_index) return -EINVAL; } sar_specs->sub_specs[index].power = nla_get_s32(spec[NL80211_SAR_ATTR_SPECS_POWER]); sar_specs->sub_specs[index].freq_range_index = range_index; return 0; } static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *spec[NL80211_SAR_ATTR_SPECS_MAX + 1]; struct nlattr *tb[NL80211_SAR_ATTR_MAX + 1]; struct cfg80211_sar_specs *sar_spec; enum nl80211_sar_type type; struct nlattr *spec_list; u32 specs; int rem, err; if (!rdev->wiphy.sar_capa || !rdev->ops->set_sar_specs) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_SAR_SPEC]) return -EINVAL; nla_parse_nested(tb, NL80211_SAR_ATTR_MAX, info->attrs[NL80211_ATTR_SAR_SPEC], NULL, NULL); if (!tb[NL80211_SAR_ATTR_TYPE] || !tb[NL80211_SAR_ATTR_SPECS]) return -EINVAL; type = nla_get_u32(tb[NL80211_SAR_ATTR_TYPE]); if (type != rdev->wiphy.sar_capa->type) return -EINVAL; specs = 0; nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) specs++; if (specs > rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; sar_spec = kzalloc(struct_size(sar_spec, sub_specs, specs), GFP_KERNEL); if (!sar_spec) return -ENOMEM; sar_spec->type = type; specs = 0; nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) { nla_parse_nested(spec, NL80211_SAR_ATTR_SPECS_MAX, spec_list, NULL, NULL); switch (type) { case NL80211_SAR_TYPE_POWER: if (nl80211_set_sar_sub_specs(rdev, sar_spec, spec, specs)) { err = -EINVAL; goto error; } break; default: err = -EINVAL; goto error; } specs++; } sar_spec->num_sub_specs = specs; rdev->cur_cmd_info = info; err = rdev_set_sar_specs(rdev, sar_spec); rdev->cur_cmd_info = NULL; error: kfree(sar_spec); return err; } #define SELECTOR(__sel, name, value) \ ((__sel) == (value)) ? NL80211_IFL_SEL_##name : int __missing_selector(void); #define IFLAGS(__val) INTERNAL_FLAG_SELECTORS(__val) __missing_selector() static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, }; static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wiphy, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_SET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_NEW_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL | /* we take the wiphy mutex later ourselves */ NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_DEL_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_GET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_NEW_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BEACON, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, /* can be retrieved by unprivileged users */ }, #ifdef CONFIG_CFG80211_CRDA_SUPPORT { .cmd = NL80211_CMD_SET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, .flags = GENL_ADMIN_PERM, }, #endif { .cmd = NL80211_CMD_REQ_SET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_req_set_reg, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_reload_regdb, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TRIGGER_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_ABORT_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_AUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_authenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_ASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_associate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEAUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DISASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_NL80211_TESTMODE { .cmd = NL80211_CMD_TESTMODE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { .cmd = NL80211_CMD_CONNECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_connect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_connect_params, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DISCONNECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL | NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_GET_SURVEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_FLUSH_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, /* FIXME: requiring a link ID here is probably not good */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REGISTER_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_GET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CQM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_JOIN_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_rekey_data, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_TDLS_MGMT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_TDLS_OPER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_PROBE_CLIENT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REGISTER_BEACONS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_NOACK_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_START_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_START_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_MAC_ACL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_RADAR_DETECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NO_WIPHY_MTX | NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_protocol_features, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_SET_QOS_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ADD_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_DEL_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_NOTIFY_RADAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UPDATE_OWE_INFO, .doit = nl80211_update_owe_info, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_PROBE_MESH_LINK, .doit = nl80211_probe_mesh_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_SAR_SPECS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_sar_specs, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_color_change, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_FILS_AAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_fils_aad, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ADD_LINK, .doit = nl80211_add_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REMOVE_LINK, .doit = nl80211_remove_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_ADD_LINK_STA, .doit = nl80211_add_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_MODIFY_LINK_STA, .doit = nl80211_modify_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REMOVE_LINK_STA, .doit = nl80211_remove_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_HW_TIMESTAMP, .doit = nl80211_set_hw_timestamp, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, }; static struct genl_family nl80211_fam __ro_after_init = { .name = NL80211_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .policy = nl80211_policy, .netnsok = true, .pre_doit = nl80211_pre_doit, .post_doit = nl80211_post_doit, .module = THIS_MODULE, .ops = nl80211_ops, .n_ops = ARRAY_SIZE(nl80211_ops), .small_ops = nl80211_small_ops, .n_small_ops = ARRAY_SIZE(nl80211_small_ops), .resv_start_op = NL80211_CMD_REMOVE_LINK_STA + 1, .mcgrps = nl80211_mcgrps, .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), .parallel_ops = true, }; /* notification functions */ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd) { struct sk_buff *msg; struct nl80211_dump_wiphy_state state = {}; WARN_ON(cmd != NL80211_CMD_NEW_WIPHY && cmd != NL80211_CMD_DEL_WIPHY); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_CONFIG, GFP_KERNEL); } void nl80211_notify_iface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_commands cmd) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, cmd) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_CONFIG, GFP_KERNEL); } static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct cfg80211_scan_request *req = rdev->scan_req; struct nlattr *nest; int i; struct cfg80211_scan_info *info; if (WARN_ON(!req)) return 0; nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_ssids; i++) { if (nla_put(msg, i, req->ssids[i].ssid_len, req->ssids[i].ssid)) goto nla_put_failure; } nla_nest_end(msg, nest); if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) { nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, ieee80211_channel_to_khz(req->channels[i]))) goto nla_put_failure; } nla_nest_end(msg, nest); } else { nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, req->channels[i]->center_freq)) goto nla_put_failure; } nla_nest_end(msg, nest); } if (req->ie && nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; if (req->flags && nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) goto nla_put_failure; info = rdev->int_scan_req ? &rdev->int_scan_req->info : &rdev->scan_req->info; if (info->scan_start_tsf && (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF, info->scan_start_tsf, NL80211_BSS_PAD) || nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN, info->tsf_bssid))) goto nla_put_failure; return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_prep_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ nl80211_add_scan_req(msg, rdev); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_prep_sched_scan_msg(struct sk_buff *msg, struct cfg80211_sched_scan_request *req, u32 cmd) { void *hdr; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, wiphy_to_rdev(req->wiphy)->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, req->dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->reqid, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_prep_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_TRIGGER_SCAN) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, bool aborted) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return NULL; if (nl80211_prep_scan_msg(msg, rdev, wdev, 0, 0, 0, aborted ? NL80211_CMD_SCAN_ABORTED : NL80211_CMD_NEW_SCAN_RESULTS) < 0) { nlmsg_free(msg); return NULL; } return msg; } /* send message created by nl80211_build_scan_msg() */ void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { if (!msg) return; genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_prep_sched_scan_msg(msg, req, cmd) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(req->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } static bool nl80211_reg_change_event_fill(struct sk_buff *msg, struct regulatory_request *request) { /* Userspace can always count this one always being set */ if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator)) goto nla_put_failure; if (request->alpha2[0] == '0' && request->alpha2[1] == '0') { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_WORLD)) goto nla_put_failure; } else if (request->alpha2[0] == '9' && request->alpha2[1] == '9') { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_CUSTOM_WORLD)) goto nla_put_failure; } else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') || request->intersect) { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_INTERSECTION)) goto nla_put_failure; } else { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_COUNTRY) || nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2)) goto nla_put_failure; } if (request->wiphy_idx != WIPHY_IDX_INVALID) { struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx); if (wiphy && nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) goto nla_put_failure; if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; } return true; nla_put_failure: return false; } /* * This can happen on global regulatory changes or device specific settings * based on custom regulatory domains. */ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, struct regulatory_request *request) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); if (!hdr) goto nla_put_failure; if (!nl80211_reg_change_event_fill(msg, request)) goto nla_put_failure; genlmsg_end(msg, hdr); rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, NL80211_MCGRP_REGULATORY, GFP_ATOMIC); rcu_read_unlock(); return; nla_put_failure: nlmsg_free(msg); } static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, enum nl80211_commands cmd, gfp_t gfp, int uapsd_queues, const u8 *req_ies, size_t req_ies_len, bool reconnect) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + len + req_ies_len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || (req_ies && nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies))) goto nla_put_failure; if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED)) goto nla_put_failure; if (uapsd_queues >= 0) { struct nlattr *nla_wmm = nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); if (!nla_wmm) goto nla_put_failure; if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES, uapsd_queues)) goto nla_put_failure; nla_nest_end(msg, nla_wmm); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0, false); } void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_rx_assoc_resp_data *data) { nl80211_send_mlme_event(rdev, netdev, data->buf, data->len, NL80211_CMD_ASSOCIATE, GFP_KERNEL, data->uapsd_queues, data->req_ies, data->req_ies_len, false); } void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0, reconnect); } void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0, reconnect); } void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; u32 cmd; if (WARN_ON(len < 2)) return; if (ieee80211_is_deauth(mgmt->frame_control)) { cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; } else if (ieee80211_is_disassoc(mgmt->frame_control)) { cmd = NL80211_CMD_UNPROT_DISASSOCIATE; } else if (ieee80211_is_beacon(mgmt->frame_control)) { if (wdev->unprot_beacon_reported && elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000) return; cmd = NL80211_CMD_UNPROT_BEACON; wdev->unprot_beacon_reported = jiffies; } else { return; } trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1, NULL, 0, false); } EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, const u8 *addr, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp) { nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_AUTHENTICATE, addr, gfp); } void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp) { nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE, addr, gfp); } void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_connect_resp_params *cr, gfp_t gfp) { struct sk_buff *msg; void *hdr; unsigned int link; size_t link_info_size = 0; const u8 *connected_addr = cr->valid_links ? cr->ap_mld_addr : cr->links[0].bssid; if (cr->valid_links) { for_each_valid_link(cr, link) { /* Nested attribute header */ link_info_size += NLA_HDRLEN; /* Link ID */ link_info_size += nla_total_size(sizeof(u8)); link_info_size += cr->links[link].addr ? nla_total_size(ETH_ALEN) : 0; link_info_size += (cr->links[link].bssid || cr->links[link].bss) ? nla_total_size(ETH_ALEN) : 0; link_info_size += nla_total_size(sizeof(u16)); } } msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len + cr->fils.kek_len + cr->fils.pmk_len + (cr->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONNECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (connected_addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr)) || nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE : cr->status) || (cr->status < 0 && (nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, cr->timeout_reason))) || (cr->req_ie && nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) || (cr->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len, cr->resp_ie)) || (cr->fils.update_erp_next_seq_num && nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, cr->fils.erp_next_seq_num)) || (cr->status == WLAN_STATUS_SUCCESS && ((cr->fils.kek && nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils.kek_len, cr->fils.kek)) || (cr->fils.pmk && nla_put(msg, NL80211_ATTR_PMK, cr->fils.pmk_len, cr->fils.pmk)) || (cr->fils.pmkid && nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->fils.pmkid))))) goto nla_put_failure; if (cr->valid_links) { int i = 1; struct nlattr *nested; nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!nested) goto nla_put_failure; for_each_valid_link(cr, link) { struct nlattr *nested_mlo_links; const u8 *bssid = cr->links[link].bss ? cr->links[link].bss->bssid : cr->links[link].bssid; nested_mlo_links = nla_nest_start(msg, i); if (!nested_mlo_links) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) || (bssid && nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) || (cr->links[link].addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->links[link].addr)) || nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, cr->links[link].status)) goto nla_put_failure; nla_nest_end(msg, nested_mlo_links); i++; } nla_nest_end(msg, nested); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp) { struct sk_buff *msg; void *hdr; size_t link_info_size = 0; unsigned int link; const u8 *connected_addr = info->ap_mld_addr ? info->ap_mld_addr : (info->links[0].bss ? info->links[0].bss->bssid : info->links[0].bssid); if (info->valid_links) { for_each_valid_link(info, link) { /* Nested attribute header */ link_info_size += NLA_HDRLEN; /* Link ID */ link_info_size += nla_total_size(sizeof(u8)); link_info_size += info->links[link].addr ? nla_total_size(ETH_ALEN) : 0; link_info_size += (info->links[link].bssid || info->links[link].bss) ? nla_total_size(ETH_ALEN) : 0; } } msg = nlmsg_new(100 + info->req_ie_len + info->resp_ie_len + info->fils.kek_len + info->fils.pmk_len + (info->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ROAM); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr) || (info->req_ie && nla_put(msg, NL80211_ATTR_REQ_IE, info->req_ie_len, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, info->resp_ie)) || (info->fils.update_erp_next_seq_num && nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, info->fils.erp_next_seq_num)) || (info->fils.kek && nla_put(msg, NL80211_ATTR_FILS_KEK, info->fils.kek_len, info->fils.kek)) || (info->fils.pmk && nla_put(msg, NL80211_ATTR_PMK, info->fils.pmk_len, info->fils.pmk)) || (info->fils.pmkid && nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, info->fils.pmkid))) goto nla_put_failure; if (info->valid_links) { int i = 1; struct nlattr *nested; nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!nested) goto nla_put_failure; for_each_valid_link(info, link) { struct nlattr *nested_mlo_links; const u8 *bssid = info->links[link].bss ? info->links[link].bss->bssid : info->links[link].bssid; nested_mlo_links = nla_nest_start(msg, i); if (!nested_mlo_links) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) || (bssid && nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) || (info->links[link].addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, info->links[link].addr))) goto nla_put_failure; nla_nest_end(msg, nested_mlo_links); i++; } nla_nest_end(msg, nested); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer_addr)) goto nla_put_failure; if ((td_bitmap_len > 0) && td_bitmap) if (nla_put(msg, NL80211_ATTR_TD_BITMAP, td_bitmap_len, td_bitmap)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + ie_len, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DISCONNECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (reason && nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) || (from_ap && nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) || (ie && nla_put(msg, NL80211_ATTR_IE, ie_len, ie))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_links_removed(struct net_device *dev, u16 link_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *links; void *hdr; lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_links_removed(dev, link_mask); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; if (WARN_ON(!wdev->valid_links || !link_mask || (wdev->valid_links & link_mask) != link_mask || wdev->valid_links == link_mask)) return; cfg80211_wdev_release_link_bsses(wdev, link_mask); wdev->valid_links &= ~link_mask; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_LINKS_REMOVED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!links) goto nla_put_failure; while (link_mask) { struct nlattr *link; int link_id = __ffs(link_mask); link = nla_nest_start(msg, link_id + 1); if (!link) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; nla_nest_end(msg, link); link_mask &= ~(1 << link_id); } nla_nest_end(msg, links); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_links_removed); void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_JOIN_IBSS); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, const u8 *ie, u8 ie_len, int sig_dbm, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) return; trace_cfg80211_notify_new_peer_candidate(dev, addr); msg = nlmsg_new(100 + ie_len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NEW_PEER_CANDIDATE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || (ie_len && ie && nla_put(msg, NL80211_ATTR_IE, ie_len, ie)) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, enum nl80211_key_type key_type, int key_id, const u8 *tsc, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_MICHAEL_MIC_FAILURE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) || nla_put_u32(msg, NL80211_ATTR_KEY_TYPE, key_type) || (key_id != -1 && nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_id)) || (tsc && nla_put(msg, NL80211_ATTR_KEY_SEQ, 6, tsc))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_beacon_hint_event(struct wiphy *wiphy, struct ieee80211_channel *channel_before, struct ieee80211_channel *channel_after) { struct sk_buff *msg; void *hdr; struct nlattr *nl_freq; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_BEACON_HINT); if (!hdr) { nlmsg_free(msg); return; } /* * Since we are applying the beacon hint to a wiphy we know its * wiphy_idx is valid */ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure; /* Before */ nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_BEFORE); if (!nl_freq) goto nla_put_failure; if (nl80211_msg_put_channel(msg, wiphy, channel_before, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); /* After */ nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_AFTER); if (!nl_freq) goto nla_put_failure; if (nl80211_msg_put_channel(msg, wiphy, channel_after, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); genlmsg_end(msg, hdr); rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, NL80211_MCGRP_REGULATORY, GFP_ATOMIC); rcu_read_unlock(); return; nla_put_failure: nlmsg_free(msg); } static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, NL80211_CHAN_NO_HT) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL && nla_put_u32(msg, NL80211_ATTR_DURATION, duration)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_assoc_comeback(struct net_device *netdev, const u8 *ap_addr, u32 timeout) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_assoc_comeback(wdev, ap_addr, timeout); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ASSOC_COMEBACK); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ap_addr) || nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_assoc_comeback); void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, 0, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_tx_mgmt_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_FRAME_WAIT_CANCEL, rdev, wdev, cookie, chan, 0, gfp); } EXPORT_SYMBOL(cfg80211_tx_mgmt_expired); void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; trace_cfg80211_new_sta(dev, mac_addr, sinfo); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } EXPORT_SYMBOL(cfg80211_new_sta); void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct station_info empty_sinfo = {}; if (!sinfo) sinfo = &empty_sinfo; trace_cfg80211_del_sta(dev, mac_addr); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) { cfg80211_sinfo_release_content(sinfo); return; } if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } EXPORT_SYMBOL(cfg80211_del_sta_sinfo); void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, enum nl80211_connect_failed_reason reason, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_GOODSIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_conn_failed); static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) return false; msg = nlmsg_new(100, gfp); if (!msg) return true; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return true; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: nlmsg_free(msg); return true; } bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; bool ret; trace_cfg80211_rx_spurious_frame(dev, addr); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO)) { trace_cfg80211_return_bool(false); return false; } ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, addr, gfp); trace_cfg80211_return_bool(ret); return ret; } EXPORT_SYMBOL(cfg80211_rx_spurious_frame); bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; bool ret; trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO && wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { trace_cfg80211_return_bool(false); return false; } ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_4ADDR_FRAME, addr, gfp); trace_cfg80211_return_bool(ret); return ret; } EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlportid, struct cfg80211_rx_info *info, gfp_t gfp) { struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + info->len, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOMEM; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (info->have_link_id && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, info->link_id)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(info->freq)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, info->freq % 1000) || (info->sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, info->sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, info->len, info->buf) || (info->flags && nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, info->flags)) || (info->rx_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_RX_HW_TIMESTAMP, info->rx_tstamp, NL80211_ATTR_PAD)) || (info->ack_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_TX_HW_TIMESTAMP, info->ack_tstamp, NL80211_ATTR_PAD))) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static void nl80211_frame_tx_status(struct wireless_dev *wdev, struct cfg80211_tx_status *status, gfp_t gfp, enum nl80211_commands command) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; if (command == NL80211_CMD_FRAME_TX_STATUS) trace_cfg80211_mgmt_tx_status(wdev, status->cookie, status->ack); else trace_cfg80211_control_port_tx_status(wdev, status->cookie, status->ack); msg = nlmsg_new(100 + status->len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, command); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_FRAME, status->len, status->buf) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, status->cookie, NL80211_ATTR_PAD) || (status->ack && nla_put_flag(msg, NL80211_ATTR_ACK)) || (status->tx_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_TX_HW_TIMESTAMP, status->tx_tstamp, NL80211_ATTR_PAD)) || (status->ack_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_RX_HW_TIMESTAMP, status->ack_tstamp, NL80211_ATTR_PAD))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct cfg80211_tx_status status = { .cookie = cookie, .buf = buf, .len = len, .ack = ack }; nl80211_frame_tx_status(wdev, &status, gfp, NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS); } EXPORT_SYMBOL(cfg80211_control_port_tx_status); void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev, struct cfg80211_tx_status *status, gfp_t gfp) { nl80211_frame_tx_status(wdev, status, gfp, NL80211_CMD_FRAME_TX_STATUS); } EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext); static int __nl80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, bool unencrypted, int link_id, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); const u8 *addr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; struct nlattr *frame; u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid); if (!nlportid) return -ENOENT; msg = nlmsg_new(100 + skb->len, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONTROL_PORT_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOBUFS; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (link_id >= 0 && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len); if (!frame) goto nla_put_failure; skb_copy_bits(skb, 0, nla_data(frame), skb->len); genlmsg_end(msg, hdr); return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, bool unencrypted, int link_id) { int ret; trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id); ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id, GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } EXPORT_SYMBOL(cfg80211_rx_control_port); static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev, const char *mac, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); void **cb; if (!msg) return NULL; cb = (void **)msg->cb; cb[0] = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); if (!cb[0]) { nlmsg_free(msg); return NULL; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (mac && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; cb[1] = nla_nest_start_noflag(msg, NL80211_ATTR_CQM); if (!cb[1]) goto nla_put_failure; cb[2] = rdev; return msg; nla_put_failure: nlmsg_free(msg); return NULL; } static void cfg80211_send_cqm(struct sk_buff *msg, gfp_t gfp) { void **cb = (void **)msg->cb; struct cfg80211_registered_device *rdev = cb[2]; nla_nest_end(msg, cb[1]); genlmsg_end(msg, cb[0]); memset(msg->cb, 0, sizeof(msg->cb)); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_cqm_config *cqm_config; trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); if (WARN_ON(rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW && rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; rcu_read_lock(); cqm_config = rcu_dereference(wdev->cqm_config); if (cqm_config) { cqm_config->last_rssi_event_value = rssi_level; cqm_config->last_rssi_event_type = rssi_event; wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work); } rcu_read_unlock(); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) { struct wireless_dev *wdev = container_of(work, struct wireless_dev, cqm_rssi_work); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_cqm_rssi_threshold_event rssi_event; struct cfg80211_cqm_config *cqm_config; struct sk_buff *msg; s32 rssi_level; cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config); if (!cqm_config) return; if (cqm_config->use_range_api) cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, rssi_event)) goto nla_put_failure; if (rssi_level && nla_put_s32(msg, NL80211_ATTR_CQM_RSSI_LEVEL, rssi_level)) goto nla_put_failure; cfg80211_send_cqm(msg, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) { struct sk_buff *msg; msg = cfg80211_prepare_cqm(dev, peer, gfp); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_PKTS, num_packets)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_RATE, rate)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_INTVL, intvl)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_txe_notify); void cfg80211_cqm_pktloss_notify(struct net_device *dev, const u8 *peer, u32 num_packets, gfp_t gfp) { struct sk_buff *msg; trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); msg = cfg80211_prepare_cqm(dev, peer, gfp); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp) { struct sk_buff *msg; msg = cfg80211_prepare_cqm(dev, NULL, gfp); if (!msg) return; if (nla_put_flag(msg, NL80211_ATTR_CQM_BEACON_LOSS_EVENT)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_beacon_loss_notify); static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct sk_buff *msg; struct nlattr *rekey_attr; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_REKEY_OFFLOAD); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; rekey_attr = nla_nest_start_noflag(msg, NL80211_ATTR_REKEY_DATA); if (!rekey_attr) goto nla_put_failure; if (nla_put(msg, NL80211_REKEY_DATA_REPLAY_CTR, NL80211_REPLAY_CTR_LEN, replay_ctr)) goto nla_put_failure; nla_nest_end(msg, rekey_attr); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_gtk_rekey_notify(dev, bssid); nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); } EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); static void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, int index, const u8 *bssid, bool preauth, gfp_t gfp) { struct sk_buff *msg; struct nlattr *attr; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PMKSA_CANDIDATE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; attr = nla_nest_start_noflag(msg, NL80211_ATTR_PMKSA_CANDIDATE); if (!attr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_PMKSA_CANDIDATE_INDEX, index) || nla_put(msg, NL80211_PMKSA_CANDIDATE_BSSID, ETH_ALEN, bssid) || (preauth && nla_put_flag(msg, NL80211_PMKSA_CANDIDATE_PREAUTH))) goto nla_put_failure; nla_nest_end(msg, attr); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, const u8 *bssid, bool preauth, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, unsigned int link_id, struct cfg80211_chan_def *chandef, gfp_t gfp, enum nl80211_commands notif, u8 count, bool quiet, u16 punct_bitmap) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, notif); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; if (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; if (notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) { if (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count)) goto nla_put_failure; if (quiet && nla_put_flag(msg, NL80211_ATTR_CH_SWITCH_BLOCK_TX)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, punct_bitmap)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u16 punct_bitmap) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (!WARN_ON(!wdev->links[link_id].client.current_bss)) cfg80211_update_assoc_bss_entry(wdev, link_id, chandef->chan); break; case NL80211_IFTYPE_MESH_POINT: wdev->u.mesh.chandef = *chandef; wdev->u.mesh.preset_chandef = *chandef; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: wdev->links[link_id].ap.chandef = *chandef; break; case NL80211_IFTYPE_ADHOC: wdev->u.ibss.chandef = *chandef; break; default: WARN_ON(1); break; } cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0, false, punct_bitmap); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, bool quiet, u16 punct_bitmap) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id, punct_bitmap); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count, quiet, punct_bitmap); } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); int cfg80211_bss_color_notify(struct net_device *dev, enum nl80211_commands cmd, u8 count, u64 color_bitmap) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED && nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count)) goto nla_put_failure; if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION && nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP, color_bitmap, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); nla_put_failure: nlmsg_free(msg); return -EINVAL; } EXPORT_SYMBOL(cfg80211_bss_color_notify); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; /* NOP and radar events don't need a netdev parameter */ if (netdev) { struct wireless_dev *wdev = netdev->ieee80211_ptr; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) goto nla_put_failure; if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, struct sta_opmode_info *sta_opmode, gfp_t gfp) { struct sk_buff *msg; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); void *hdr; if (WARN_ON(!mac)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) && nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) && nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) && nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify); void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, s32 ack_signal, bool is_valid_ack_signal, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_probe_status(dev, addr, cookie, acked); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PROBE_CLIENT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD) || (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) || (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL, ack_signal))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_probe_status); void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; struct cfg80211_beacon_registration *reg; trace_cfg80211_report_obss_beacon(wiphy, frame, len, freq, sig_dbm); spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry(reg, &rdev->beacon_registrations, list) { msg = nlmsg_new(len + 100, GFP_ATOMIC); if (!msg) { spin_unlock_bh(&rdev->beacon_registrations_lock); return; } hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (freq && (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000))) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, len, frame)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, reg->nlportid); } spin_unlock_bh(&rdev->beacon_registrations_lock); return; nla_put_failure: spin_unlock_bh(&rdev->beacon_registrations_lock); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_report_obss_beacon_khz); #ifdef CONFIG_PM static int cfg80211_net_detect_results(struct sk_buff *msg, struct cfg80211_wowlan_wakeup *wakeup) { struct cfg80211_wowlan_nd_info *nd = wakeup->net_detect; struct nlattr *nl_results, *nl_match, *nl_freqs; int i, j; nl_results = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); if (!nl_results) return -EMSGSIZE; for (i = 0; i < nd->n_matches; i++) { struct cfg80211_wowlan_nd_match *match = nd->matches[i]; nl_match = nla_nest_start_noflag(msg, i); if (!nl_match) break; /* The SSID attribute is optional in nl80211, but for * simplicity reasons it's always present in the * cfg80211 structure. If a driver can't pass the * SSID, that needs to be changed. A zero length SSID * is still a valid SSID (wildcard), so it cannot be * used for this purpose. */ if (nla_put(msg, NL80211_ATTR_SSID, match->ssid.ssid_len, match->ssid.ssid)) { nla_nest_cancel(msg, nl_match); goto out; } if (match->n_channels) { nl_freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nl_freqs) { nla_nest_cancel(msg, nl_match); goto out; } for (j = 0; j < match->n_channels; j++) { if (nla_put_u32(msg, j, match->channels[j])) { nla_nest_cancel(msg, nl_freqs); nla_nest_cancel(msg, nl_match); goto out; } } nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_match); } out: nla_nest_end(msg, nl_results); return 0; } void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; int size = 200; trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); if (wakeup) size += wakeup->packet_present_len; msg = nlmsg_new(size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); if (!hdr) goto free_msg; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto free_msg; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto free_msg; if (wakeup) { struct nlattr *reasons; reasons = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!reasons) goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) goto free_msg; if (wakeup->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) goto free_msg; if (wakeup->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) goto free_msg; if (wakeup->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) goto free_msg; if (wakeup->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) goto free_msg; if (wakeup->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) goto free_msg; if (wakeup->pattern_idx >= 0 && nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, wakeup->pattern_idx)) goto free_msg; if (wakeup->tcp_match && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) goto free_msg; if (wakeup->tcp_connlost && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) goto free_msg; if (wakeup->tcp_nomoretokens && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; if (!wakeup->packet_80211) { pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; } if (wakeup->packet_len && nla_put_u32(msg, len_attr, wakeup->packet_len)) goto free_msg; if (nla_put(msg, pkt_attr, wakeup->packet_present_len, wakeup->packet)) goto free_msg; } if (wakeup->net_detect && cfg80211_net_detect_results(msg, wakeup)) goto free_msg; nla_nest_end(msg, reasons); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; free_msg: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); #endif void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, reason_code); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_TDLS_OPER); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u8(msg, NL80211_ATTR_TDLS_OPERATION, oper) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer) || (reason_code > 0 && nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_tdls_oper_request); static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) { struct netlink_notify *notify = _notify; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { struct cfg80211_sched_scan_request *sched_scan_req; list_for_each_entry_rcu(sched_scan_req, &rdev->sched_scan_req_list, list) { if (sched_scan_req->owner_nlportid == notify->portid) { sched_scan_req->nl_owner_dead = true; wiphy_work_queue(&rdev->wiphy, &rdev->sched_scan_stop_wk); } } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); if (wdev->owner_nlportid == notify->portid) { wdev->nl_owner_dead = true; schedule_work(&rdev->destroy_work); } else if (wdev->conn_owner_nlportid == notify->portid) { schedule_work(&wdev->disconnect_wk); } cfg80211_release_pmsr(wdev, notify->portid); } spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { if (reg->nlportid == notify->portid) { list_del(®->list); kfree(reg); break; } } spin_unlock_bh(&rdev->beacon_registrations_lock); } rcu_read_unlock(); /* * It is possible that the user space process that is controlling the * indoor setting disappeared, so notify the regulatory core. */ regulatory_netlink_notify(notify->portid); return NOTIFY_OK; } static struct notifier_block nl80211_netlink_notifier = { .notifier_call = nl80211_netlink_notify, }; void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_ft_event_params *ft_event) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_ft_event(wiphy, netdev, ft_event); if (!ft_event->target_ap) return; msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) goto out; if (ft_event->ies && nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) goto out; if (ft_event->ric_ies && nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, ft_event->ric_ies)) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) { struct cfg80211_registered_device *rdev; struct sk_buff *msg; void *hdr; u32 nlportid; rdev = wiphy_to_rdev(wdev->wiphy); if (!rdev->crit_proto_nlportid) return; nlportid = rdev->crit_proto_nlportid; rdev->crit_proto_nlportid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_crit_proto_stopped); void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STOP_AP); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } int cfg80211_external_auth_request(struct net_device *dev, struct cfg80211_external_auth_params *params, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; if (!wdev->conn_owner_nlportid) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) || nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION, params->action) || nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) || nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len, params->ssid.ssid) || (!is_zero_ether_addr(params->mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, params->mld_addr))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->conn_owner_nlportid); return 0; nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } EXPORT_SYMBOL(cfg80211_external_auth_request); void cfg80211_update_owe_info_event(struct net_device *netdev, struct cfg80211_update_owe_info *owe_info, gfp_t gfp) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_update_owe_info_event(wiphy, netdev, owe_info); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UPDATE_OWE_INFO); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, owe_info->peer)) goto nla_put_failure; if (!owe_info->ie_len || nla_put(msg, NL80211_ATTR_IE, owe_info->ie_len, owe_info->ie)) goto nla_put_failure; if (owe_info->assoc_link_id != -1) { if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, owe_info->assoc_link_id)) goto nla_put_failure; if (!is_zero_ether_addr(owe_info->peer_mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, owe_info->peer_mld_addr)) goto nla_put_failure; } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_update_owe_info_event); /* initialisation/exit functions */ int __init nl80211_init(void) { int err; err = genl_register_family(&nl80211_fam); if (err) return err; err = netlink_register_notifier(&nl80211_netlink_notifier); if (err) goto err_out; return 0; err_out: genl_unregister_family(&nl80211_fam); return err; } void nl80211_exit(void) { netlink_unregister_notifier(&nl80211_netlink_notifier); genl_unregister_family(&nl80211_fam); } |
402 402 4 4 4 3 1 3 3 3 3 1 19 13 15 8 8 8 8 8 2 2 2 1 1 1 1 1 19 4 13 2 13 6 5 8 5 5 4 402 402 || // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs * Harald Welte don't use nfcache */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/sctp.h> #include <linux/icmp.h> #include <linux/slab.h> #include <net/ip.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/gue.h> #include <net/gre.h> #include <net/route.h> #include <net/ip6_checksum.h> #include <net/netns/generic.h> /* net_generic() */ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <linux/netfilter_ipv6.h> #include <net/ip6_route.h> #endif #include <net/ip_vs.h> #include <linux/indirect_call_wrapper.h> EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); EXPORT_SYMBOL(ip_vs_conn_out_get); #ifdef CONFIG_IP_VS_PROTO_TCP EXPORT_SYMBOL(ip_vs_tcp_conn_listen); #endif EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); #if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP) #define SNAT_CALL(f, ...) \ INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__) #elif defined(CONFIG_IP_VS_PROTO_TCP) #define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__) #elif defined(CONFIG_IP_VS_PROTO_UDP) #define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__) #else #define SNAT_CALL(f, ...) f(__VA_ARGS__) #endif static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) #define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) const char *ip_vs_proto_name(unsigned int proto) { static char buf[20]; switch (proto) { case IPPROTO_IP: return "IP"; case IPPROTO_UDP: return "UDP"; case IPPROTO_TCP: return "TCP"; case IPPROTO_SCTP: return "SCTP"; case IPPROTO_ICMP: return "ICMP"; #ifdef CONFIG_IP_VS_IPV6 case IPPROTO_ICMPV6: return "ICMPv6"; #endif default: sprintf(buf, "IP_%u", proto); return buf; } } void ip_vs_init_hash_table(struct list_head *table, int rows) { while (--rows >= 0) INIT_LIST_HEAD(&table[rows]); } static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; local_bh_disable(); s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); local_bh_enable(); } } static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; local_bh_disable(); s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); local_bh_enable(); } } static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; local_bh_disable(); s = this_cpu_ptr(cp->dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); local_bh_enable(); } static inline void ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (likely(pd->pp->state_transition)) pd->pp->state_transition(cp, direction, skb, pd); } static inline int ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, struct sk_buff *skb, int protocol, const union nf_inet_addr *caddr, __be16 cport, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr, vport, p); p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); return 0; } /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, * or selects a server and creates a connection entry plus a template. * Locking: we are svc user (svc->refcnt), so we hold all dests too * Protocols supported: TCP, UDP */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, struct sk_buff *skb, __be16 src_port, __be16 dst_port, int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp = NULL; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport = 0; /* destination port to forward */ unsigned int flags; struct ip_vs_conn_param param; const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ const union nf_inet_addr *src_addr, *dst_addr; if (likely(!ip_vs_iph_inverse(iph))) { src_addr = &iph->saddr; dst_addr = &iph->daddr; } else { src_addr = &iph->daddr; dst_addr = &iph->saddr; } /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &src_addr->in6, (__force __u32) svc->netmask); else #endif snet.ip = src_addr->ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port), IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* * As far as we know, FTP is a very complicated network protocol, and * it uses control connection and data connections. For active FTP, * FTP server initialize data connection to the client, its source port * is often 20. For passive FTP, FTP server tells the clients the port * that it passively listens to, and the client issues the data * connection. In the tunneling or direct routing mode, the load * balancer is on the client-to-server half of connection, the port * number is unknown to the load balancer. So, a conn template like * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> * is created for other persistent services. */ { int protocol = iph->protocol; const union nf_inet_addr *vaddr = dst_addr; __be16 vport = 0; if (dst_port == svc->port) { /* non-FTP template: * <protocol, caddr, 0, vaddr, vport, daddr, dport> * FTP template: * <protocol, caddr, 0, vaddr, 0, daddr, 0> */ if (svc->port != FTPPORT) vport = dst_port; } else { /* Note: persistent fwmark-based services and * persistent port zero service are handled here. * fwmark template: * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: * <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) { protocol = IPPROTO_IP; vaddr = &fwmark; } } /* return *ignored = -1 so NF_DROP can be used */ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, vaddr, vport, ¶m) < 0) { *ignored = -1; return NULL; } } /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); if (sched) { /* read svc->sched_data after svc->scheduler */ smp_rmb(); dest = sched->schedule(svc, skb, iph); } else { dest = NULL; } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; return NULL; } if (dst_port == svc->port && svc->port != FTPPORT) dport = dest->port; /* Create a template * This adds param.pe_data to the template, * and thus param.pe_data will be destroyed * when the template expires */ ct = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; return NULL; } ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; kfree(param.pe_data); } dport = dst_port; if (dport == svc->port && dest->port) dport = dest->port; flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a new connection according to the template */ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr, src_port, dst_addr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); *ignored = -1; return NULL; } /* * Add its control */ ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); ip_vs_conn_stats(cp, svc); return cp; } /* * IPVS main scheduling function * It selects a server according to the virtual service, and * creates a connection entry. * Protocols supported: TCP, UDP * * Usage of *ignored * * 1 : protocol tried to schedule (eg. on SYN), found svc but the * svc/scheduler decides that this packet should be accepted with * NF_ACCEPT because it must not be scheduled. * * 0 : scheduler can not find destination, so try bypass or * return ICMP and then NF_DROP (ip_vs_leave). * * -1 : scheduler tried to schedule but fatal error occurred, eg. * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param * failure such as missing Call-ID, ENOMEM on skb_linearize * or pe_data. In this case we should return NF_DROP without * any attempts to send ICMP with ip_vs_leave. */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr, cport, vport; const void *caddr, *vaddr; unsigned int flags; *ignored = 1; /* * IPv6 frags, only the first hit here. */ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (likely(!ip_vs_iph_inverse(iph))) { cport = pptr[0]; caddr = &iph->saddr; vport = pptr[1]; vaddr = &iph->daddr; } else { cport = pptr[1]; caddr = &iph->daddr; vport = pptr[0]; vaddr = &iph->saddr; } /* * FTPDATA needs this check when using local real server. * Never schedule Active FTPDATA connections from real server. * For LVS-NAT they must be already created. For other methods * with persistence the connection is created on SYN+ACK. */ if (cport == FTPDATA) { IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; } /* * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { iph->hdr_flags ^= IP_VS_HDR_INVERSE; cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, svc->ipvs, svc->af, skb, iph); iph->hdr_flags ^= IP_VS_HDR_INVERSE; if (cp) { IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling reply for existing" " connection"); __ip_vs_conn_put(cp); return NULL; } } /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph); *ignored = 0; /* * Non-persistent service */ if (!svc->fwmark && vport != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " "check your ipvs configuration\n"); return NULL; } sched = rcu_dereference(svc->scheduler); if (sched) { /* read svc->sched_data after svc->scheduler */ smp_rmb(); dest = sched->schedule(svc, skb, iph); } else { dest = NULL; } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a connection entry. */ { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, caddr, cport, vaddr, vport, &p); cp = ip_vs_conn_new(&p, dest->af, &dest->addr, dest->port ? dest->port : vport, flags, dest, skb->mark); if (!cp) { *ignored = -1; return NULL; } } IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), cp->flags, refcount_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; } static inline int ip_vs_addr_is_unicast(struct net *net, int af, union nf_inet_addr *addr) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST; #endif return (inet_addr_type(net, addr->ip) == RTN_UNICAST); } /* * Pass or drop the packet. * Called by ip_vs_in, when the virtual service is available but * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { __be16 _ports[2], *pptr, dport; struct netns_ipvs *ipvs = svc->ipvs; struct net *net = ipvs->net; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NF_DROP; dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ if (sysctl_cache_bypass(ipvs) && svc->fwmark && !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) && ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); if (!cp) return NF_DROP; } /* statistics */ ip_vs_in_stats(cp, skb); /* set state */ ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) atomic_inc(&cp->control->in_pkts); else atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } /* * When the virtual ftp service is presented, packets destined * for other services on the VIP may get here (except services * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ if (svc->port == FTPPORT && dport != FTPPORT) return NF_ACCEPT; if (unlikely(ip_vs_iph_icmp(iph))) return NF_DROP; /* * Notify the client that the destination is unreachable, and * release the socket buffer. * Since it is in IP layer, the TCP socket is not actually * created, the TCP RST packet cannot be sent, instead that * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) { if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } #ifdef CONFIG_SYSCTL static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return ipvs->sysctl_snat_reroute; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return ipvs->sysctl_nat_icmp_send; } #else static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } #endif __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) { if (NF_INET_LOCAL_IN == hooknum) return IP_DEFRAG_VS_IN; if (NF_INET_FORWARD == hooknum) return IP_DEFRAG_VS_FWD; return IP_DEFRAG_VS_OUT; } static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs, struct sk_buff *skb, u_int32_t user) { int err; local_bh_disable(); err = ip_defrag(ipvs->net, skb, user); local_bh_enable(); if (!err) ip_send_check(ip_hdr(skb)); return err; } static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, unsigned int hooknum) { if (!sysctl_snat_reroute(ipvs)) return 0; /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ if (NF_INET_LOCAL_IN == hooknum) return 0; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1; return 0; } /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in */ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct iphdr *iph = ip_hdr(skb); unsigned int icmp_offset = iph->ihl*4; struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) + icmp_offset); struct iphdr *ciph = (struct iphdr *)(icmph + 1); if (inout) { iph->saddr = cp->vaddr.ip; ip_send_check(iph); ciph->daddr = cp->vaddr.ip; ip_send_check(ciph); } else { iph->daddr = cp->daddr.ip; ip_send_check(iph); ciph->saddr = cp->daddr.ip; ip_send_check(ciph); } /* the TCP/UDP/SCTP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol || IPPROTO_SCTP == ciph->protocol) { __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; else ports[0] = cp->dport; } /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset); skb->ip_summed = CHECKSUM_UNNECESSARY; if (inout) IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered outgoing ICMP"); else IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered incoming ICMP"); } #ifdef CONFIG_IP_VS_IPV6 void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct ipv6hdr *iph = ipv6_hdr(skb); unsigned int icmp_offset = 0; unsigned int offs = 0; /* header offset*/ int protocol; struct icmp6hdr *icmph; struct ipv6hdr *ciph; unsigned short fragoffs; ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL); icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset); offs = icmp_offset + sizeof(struct icmp6hdr); ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs); protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL); if (inout) { iph->saddr = cp->vaddr.in6; ciph->daddr = cp->vaddr.in6; } else { iph->daddr = cp->daddr.in6; ciph->saddr = cp->daddr.in6; } /* the TCP/UDP/SCTP port */ if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || IPPROTO_SCTP == protocol)) { __be16 *ports = (void *)(skb_network_header(skb) + offs); IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__, ntohs(inout ? ports[1] : ports[0]), ntohs(inout ? cp->vport : cp->dport)); if (inout) ports[1] = cp->vport; else ports[0] = cp->dport; } /* And finally the ICMP checksum */ icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, skb->len - icmp_offset, IPPROTO_ICMPV6, 0); skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset; skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum); skb->ip_summed = CHECKSUM_PARTIAL; if (inout) IP_VS_DBG_PKT(11, AF_INET6, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered outgoing ICMPv6"); else IP_VS_DBG_PKT(11, AF_INET6, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered incoming ICMPv6"); } #endif /* Handle relevant response ICMP messages - forward to the right * destination host. */ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl, unsigned int hooknum) { unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n", IP_VS_DBG_ADDR(af, snet)); goto out; } if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || IPPROTO_SCTP == protocol) offset += 2 * sizeof(__u16); if (skb_ensure_writable(skb, offset)) goto out; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); else #endif ip_vs_nat_icmp(skb, pp, cp, 1); if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); verdict = NF_ACCEPT; out: __ip_vs_conn_put(cp); return verdict; } /* * Handle ICMP messages in the inside-to-outside direction (outgoing). * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. */ static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl; union nf_inet_addr snet; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, pp, ciph.len, ihl, hooknum); } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; union nf_inet_addr snet; unsigned int offset; *related = 1; ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ if (ipvsh->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph), true, &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; snet.in6 = ciph.saddr.in6; offset = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, pp, offset, sizeof(struct ipv6hdr), hooknum); } #endif /* * Check if sctp chunc is ABORT chunk */ static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) { struct sctp_chunkhdr *sch, schunk; sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return 0; if (sch->type == SCTP_CID_ABORT) return 1; return 0; } static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph); if (th == NULL) return 0; return th->rst; } static inline bool is_new_conn(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { switch (iph->protocol) { case IPPROTO_TCP: { struct tcphdr _tcph, *th; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) return false; return th->syn; } case IPPROTO_SCTP: { struct sctp_chunkhdr *sch, schunk; sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return false; return sch->type == SCTP_CID_INIT; } default: return false; } } static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, int conn_reuse_mode) { /* Controlled (FTP DATA or persistence)? */ if (cp->control) return false; switch (cp->protocol) { case IPPROTO_TCP: return (cp->state == IP_VS_TCP_S_TIME_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || ((conn_reuse_mode & 2) && (cp->state == IP_VS_TCP_S_FIN_WAIT) && (cp->flags & IP_VS_CONN_F_NOOUTPUT)); case IPPROTO_SCTP: return cp->state == IP_VS_SCTP_S_CLOSED; default: return false; } } /* Generic function to create new connections for outgoing RS packets * * Pre-requisites for successful connection creation: * 1) Virtual Service is NOT fwmark based: * In fwmark-VS actual vaddr and vport are unknown to IPVS * 2) Real Server and Virtual Service were NOT configured without port: * This is to allow match of different VS to the same RS ip-addr */ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct sk_buff *skb, const struct ip_vs_iphdr *iph, __be16 dport, __be16 cport) { struct ip_vs_conn_param param; struct ip_vs_conn *ct = NULL, *cp = NULL; const union nf_inet_addr *vaddr, *daddr, *caddr; union nf_inet_addr snet; __be16 vport; unsigned int flags; vaddr = &svc->addr; vport = svc->port; daddr = &iph->saddr; caddr = &iph->daddr; /* check pre-requisites are satisfied */ if (svc->fwmark) return NULL; if (!vport || !dport) return NULL; /* for persistent service first create connection template */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) { /* apply netmask the same way ingress-side does */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &caddr->in6, (__force __u32)svc->netmask); else #endif snet.ip = caddr->ip & svc->netmask; /* fill params and create template if not existent */ if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, &snet, 0, vaddr, vport, ¶m) < 0) return NULL; ct = ip_vs_ct_in_get(¶m); /* check if template exists and points to the same dest */ if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { kfree(param.pe_data); return NULL; } ct->timeout = svc->timeout; } else { kfree(param.pe_data); } } /* connection flags */ flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* create connection */ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, caddr, cport, vaddr, vport, ¶m); cp = ip_vs_conn_new(¶m, dest->af, daddr, dport, flags, dest, 0); if (!cp) { if (ct) ip_vs_conn_put(ct); return NULL; } if (ct) { ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); } ip_vs_conn_stats(cp, svc); /* return connection (will be used to handle outgoing packet) */ IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), cp->flags, refcount_read(&cp->refcnt)); return cp; } /* Handle outgoing packets which are considered requests initiated by * real servers, so that subsequent responses from external client can be * routed to the right real server. * Used also for outgoing responses in OPS mode. * * Connection management is handled by persistent-engine specific callback. */ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, struct netns_ipvs *ipvs, int af, struct sk_buff *skb, const struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_conn *cp = NULL; __be16 _ports[2], *pptr; if (hooknum == NF_INET_LOCAL_IN) return NULL; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NULL; dest = ip_vs_find_real_service(ipvs, af, iph->protocol, &iph->saddr, pptr[0]); if (dest) { struct ip_vs_service *svc; struct ip_vs_pe *pe; svc = rcu_dereference(dest->svc); if (svc) { pe = rcu_dereference(svc->pe); if (pe && pe->conn_out) cp = pe->conn_out(svc, dest, skb, iph, pptr[0], pptr[1]); } } return cp; } /* Handle response packets: rewrite addresses and send away... */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph, unsigned int hooknum) { struct ip_vs_protocol *pp = pd->pp; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) goto after_nat; IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) goto drop; /* mangle the packet */ if (pp->snat_handler && !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ipv6_hdr(skb)->saddr = cp->vaddr.in6; else #endif { ip_hdr(skb)->saddr = cp->vaddr.ip; ip_send_check(ip_hdr(skb)); } /* * nf_iterate does not expect change in the skb->dst->dev. * It looks like it is not fatal to enable this code for hooks * where our handlers are at the end of the chain list and * when all next handlers use skb->dst->dev and not outdev. * It will definitely route properly the inout NAT traffic * when multiple paths are used. */ /* For policy routing, packets originating from this * machine itself may be routed differently to packets * passing through. We want this packet to be routed as * if it came from this machine itself. So re-compute * the routing information. */ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto drop; IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(skb); return NF_STOLEN; } /* * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int af = state->pf; struct sock *sk; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; sk = skb_to_full_sk(skb); /* Bad... Do not break raw sockets */ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk)) return NF_ACCEPT; } if (unlikely(!skb_dst(skb))) return NF_ACCEPT; if (!ipvs->enable) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, false, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) return verdict; } } else #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) return NF_ACCEPT; pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; ip_vs_fill_iph_skb(AF_INET, skb, false, &iph); } /* * Check if the packet belongs to an existing entry */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { /* Currently only for UDP: * connection oriented protocols typically use * ephemeral ports for outgoing connections, so * related incoming responses would not match any VS */ if (pp->protocol == IPPROTO_UDP) { cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); } } if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ if ((iph.protocol != IPPROTO_TCP && iph.protocol != IPPROTO_SCTP) || ((iph.protocol == IPPROTO_TCP && !is_tcp_reset(skb, iph.len)) || (iph.protocol == IPPROTO_SCTP && !is_sctp_abort(skb, iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (!skb->dev) skb->dev = ipvs->net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } } } IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; if (!iph->fragoffs) { /* No (second) fragments need to enter here, as nf_defrag_ipv6 * replayed fragment zero will already have created the cp */ /* Schedule and create new connection entry into cpp */ if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph)) return 0; } if (unlikely(!*cpp)) { /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); /* Fragment couldn't be mapped to a conn entry */ if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); *verdict = NF_ACCEPT; return 0; } return 1; } /* Check the UDP tunnel and return its header length */ static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset, __u16 af, const union nf_inet_addr *daddr, __u8 *proto) { struct udphdr _udph, *udph; struct ip_vs_dest *dest; udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (!udph) goto unk; offset += sizeof(struct udphdr); dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest); if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { struct guehdr _gueh, *gueh; gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh); if (!gueh) goto unk; if (gueh->control != 0 || gueh->version != 0) goto unk; /* Later we can support also IPPROTO_IPV6 */ if (gueh->proto_ctype != IPPROTO_IPIP) goto unk; *proto = gueh->proto_ctype; return sizeof(struct udphdr) + sizeof(struct guehdr) + (gueh->hlen << 2); } unk: return 0; } /* Check the GRE tunnel and return its header length */ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset, __u16 af, const union nf_inet_addr *daddr, __u8 *proto) { struct gre_base_hdr _greh, *greh; struct ip_vs_dest *dest; greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh); if (!greh) goto unk; dest = ip_vs_find_tunnel(ipvs, af, daddr, 0); if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { __be16 type; /* Only support version 0 and C (csum) */ if ((greh->flags & ~GRE_CSUM) != 0) goto unk; type = greh->protocol; /* Later we can support also IPPROTO_IPV6 */ if (type != htons(ETH_P_IP)) goto unk; *proto = IPPROTO_IPIP; return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags)); } unk: return 0; } /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. */ static int ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, offset2, ihl, verdict; bool tunnel, new_cp = false; union nf_inet_addr *raddr; char *outer_proto = "IPIP"; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Incoming ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ raddr = (union nf_inet_addr *)&cih->daddr; /* Special case for errors for IPIP/UDP/GRE tunnel packets */ tunnel = false; if (cih->protocol == IPPROTO_IPIP) { struct ip_vs_dest *dest; if (unlikely(cih->frag_off & htons(IP_OFFSET))) return NF_ACCEPT; /* Error for our IPIP must arrive at LOCAL_IN */ if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) return NF_ACCEPT; dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0); /* Only for known tunnel */ if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP) return NF_ACCEPT; offset += cih->ihl * 4; cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ tunnel = true; } else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */ cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */ /* Error for our tunnel must arrive at LOCAL_IN */ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) { __u8 iproto; int ulen; /* Non-first fragment has no UDP/GRE header */ if (unlikely(cih->frag_off & htons(IP_OFFSET))) return NF_ACCEPT; offset2 = offset + cih->ihl * 4; if (cih->protocol == IPPROTO_UDP) { ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr, &iproto); outer_proto = "UDP"; } else { ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET, raddr, &iproto); outer_proto = "GRE"; } if (ulen > 0) { /* Skip IP and UDP/GRE tunnel headers */ offset = offset2 + ulen; /* Now we should be at the original IP header */ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih && cih->version == 4 && cih->ihl >= 5 && iproto == IPPROTO_IPIP) tunnel = true; else return NF_ACCEPT; } } pd = ip_vs_proto_data_get(ipvs, cih->protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); offset2 = offset; ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !tunnel, &ciph); offset = ciph.len; /* The embedded headers contain source and dest in reverse order. * For IPIP/UDP/GRE tunnel this is error for request, not for reply. */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, AF_INET, skb, &ciph); if (!cp) { int v; if (tunnel || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) return v; new_cp = true; } verdict = NF_DROP; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Incoming ICMP: failed checksum from %pI4!\n", &iph->saddr); goto out; } if (tunnel) { __be32 info = ic->un.gateway; __u8 type = ic->type; __u8 code = ic->code; /* Update the MTU */ if (ic->type == ICMP_DEST_UNREACH && ic->code == ICMP_FRAG_NEEDED) { struct ip_vs_dest *dest = cp->dest; u32 mtu = ntohs(ic->un.frag.mtu); __be16 frag_off = cih->frag_off; /* Strip outer IP and ICMP, go to IPIP/UDP/GRE header */ if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL) goto ignore_tunnel; offset2 -= ihl + sizeof(_icmph); skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for %s %pI4->%pI4: mtu=%u\n", outer_proto, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0); /* Client uses PMTUD? */ if (!(frag_off & htons(IP_DF))) goto ignore_tunnel; /* Prefer the resulting PMTU */ if (dest) { struct ip_vs_dest_dst *dest_dst; dest_dst = rcu_dereference(dest->dest_dst); if (dest_dst) mtu = dst_mtu(dest_dst->dst_cache); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); info = htonl(mtu); } /* Strip outer IP, ICMP and IPIP/UDP/GRE, go to IP header of * original request. */ if (pskb_pull(skb, offset2) == NULL) goto ignore_tunnel; skb_reset_network_header(skb); IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, type, code, ntohl(info)); icmp_send(skb, type, code, info); /* ICMP can be shorter but anyways, account it */ ip_vs_out_stats(cp, skb); ignore_tunnel: consume_skb(skb); verdict = NF_STOLEN; goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol || IPPROTO_SCTP == cih->protocol) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: if (likely(!new_cp)) __ip_vs_conn_put(cp); else ip_vs_conn_put(cp); return verdict; } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *iph) { struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, verdict; bool new_cp = false; *related = 1; ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ if (iph->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); offset = iph->len + sizeof(_icmph); if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph)) return NF_ACCEPT; pd = ip_vs_proto_data_get(ipvs, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; /* Cannot handle fragmented embedded protocol */ if (ciph.fragoffs) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, "Checking incoming ICMPv6 for"); /* The embedded headers contain source and dest in reverse order * if not from localhost */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, AF_INET6, skb, &ciph); if (!cp) { int v; if (!sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph)) return v; new_cp = true; } /* VS/TUN, VS/DR and LOCALNODE just let it go */ if ((hooknum == NF_INET_LOCAL_OUT) && (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { verdict = NF_ACCEPT; goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); /* Need to mangle contained IPv6 header in ICMPv6 packet */ offset = ciph.len; if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || IPPROTO_SCTP == ciph.protocol) offset += 2 * sizeof(__u16); /* Also mangle ports */ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); out: if (likely(!new_cp)) __ip_vs_conn_put(cp); else ip_vs_conn_put(cp); return verdict; } #endif /* * Check if it's for virtual services, look it up, * and send it on its way... */ static unsigned int ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; struct sock *sk; int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; /* * Big tappo: * - remote client: only PACKET_HOST * - route: used for struct net when skb->dev is unset */ if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { ip_vs_fill_iph_skb(af, skb, false, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, false, &iph); /* Bad... Do not break raw sockets */ sk = skb_to_full_sk(skb); if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk)) return NF_ACCEPT; } #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) return verdict; } } else #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; int verdict = ip_vs_in_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } /* Protocol supported? */ pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) { /* The only way we'll see this packet again is if it's * encapsulated, so mark it with ipvs_property=1 so we * skip it if we're ignoring tunneled packets */ if (sysctl_ignore_tunneled(ipvs)) skb->ipvs_property = 1; return NF_ACCEPT; } pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, af, skb, &iph); if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) { int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); } else if (conn_reuse_mode && is_new_conn_expected(cp, conn_reuse_mode)) { old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; } else { /* Do not reschedule controlling connection * that uses conntrack while it is still * referenced by controlled connection(s). */ resched = !old_ct; } } if (resched) { if (!old_ct) cp->flags &= ~IP_VS_CONN_F_NFCT; if (!atomic_read(&cp->n_control)) ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); if (old_ct) return NF_DROP; cp = NULL; } } /* Check the server status */ if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ if (sysctl_expire_nodest_conn(ipvs)) { bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!old_ct) cp->flags &= ~IP_VS_CONN_F_NFCT; ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); if (old_ct) return NF_DROP; cp = NULL; } else { __ip_vs_conn_put(cp); return NF_DROP; } } if (unlikely(!cp)) { int v; if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) return v; } IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp, &iph); /* do not touch skb anymore */ else { IP_VS_DBG_RL("warning: packet_xmit is null"); ret = NF_ACCEPT; } /* Increase its packet counter and check if it is needed * to be synchronized * * Sync connection if it is about to close to * encorage the standby servers to update the connections timeout * * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) pkts = sysctl_sync_threshold(ipvs); else pkts = atomic_inc_return(&cp->in_pkts); if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, pkts); else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) /* increment is done inside ip_vs_sync_conn too */ atomic_inc(&cp->control->in_pkts); ip_vs_conn_put(cp); return ret; } /* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. * When fwmark-based virtual service is used, such as transparent * cache cluster, TCP packets can be marked and routed to ip_vs_in, * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain * and send them to ip_vs_in_icmp. */ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); int r; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; if (state->pf == NFPROTO_IPV4) { if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; #ifdef CONFIG_IP_VS_IPV6 } else { struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); #endif } return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, }, }; #ifdef CONFIG_IP_VS_IPV6 static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, }, }; #endif int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af) { const struct nf_hook_ops *ops; unsigned int count; unsigned int afmask; int ret = 0; if (af == AF_INET6) { #ifdef CONFIG_IP_VS_IPV6 ops = ip_vs_ops6; count = ARRAY_SIZE(ip_vs_ops6); afmask = 2; #else return -EINVAL; #endif } else { ops = ip_vs_ops4; count = ARRAY_SIZE(ip_vs_ops4); afmask = 1; } if (!(ipvs->hooks_afmask & afmask)) { ret = nf_register_net_hooks(ipvs->net, ops, count); if (ret >= 0) ipvs->hooks_afmask |= afmask; } return ret; } void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af) { const struct nf_hook_ops *ops; unsigned int count; unsigned int afmask; if (af == AF_INET6) { #ifdef CONFIG_IP_VS_IPV6 ops = ip_vs_ops6; count = ARRAY_SIZE(ip_vs_ops6); afmask = 2; #else return; #endif } else { ops = ip_vs_ops4; count = ARRAY_SIZE(ip_vs_ops4); afmask = 1; } if (ipvs->hooks_afmask & afmask) { nf_unregister_net_hooks(ipvs->net, ops, count); ipvs->hooks_afmask &= ~afmask; } } /* * Initialize IP Virtual Server netns mem. */ static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) return -ENOMEM; /* Hold the beast until a service is registered */ ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; if (ip_vs_estimator_net_init(ipvs) < 0) goto estimator_fail; if (ip_vs_control_net_init(ipvs) < 0) goto control_fail; if (ip_vs_protocol_net_init(ipvs) < 0) goto protocol_fail; if (ip_vs_app_net_init(ipvs) < 0) goto app_fail; if (ip_vs_conn_net_init(ipvs) < 0) goto conn_fail; if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; return 0; /* * Error handling */ sync_fail: ip_vs_conn_net_cleanup(ipvs); conn_fail: ip_vs_app_net_cleanup(ipvs); app_fail: ip_vs_protocol_net_cleanup(ipvs); protocol_fail: ip_vs_control_net_cleanup(ipvs); control_fail: ip_vs_estimator_net_cleanup(ipvs); estimator_fail: net->ipvs = NULL; return -ENOMEM; } static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) { struct netns_ipvs *ipvs; struct net *net; ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */ list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); ip_vs_protocol_net_cleanup(ipvs); ip_vs_control_net_cleanup(ipvs); ip_vs_estimator_net_cleanup(ipvs); IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); net->ipvs = NULL; } } static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) { struct netns_ipvs *ipvs; struct net *net; list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); ip_vs_unregister_hooks(ipvs, AF_INET); ip_vs_unregister_hooks(ipvs, AF_INET6); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); } } static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, .exit_batch = __ip_vs_cleanup_batch, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), }; static struct pernet_operations ipvs_core_dev_ops = { .exit_batch = __ip_vs_dev_cleanup_batch, }; /* * Initialize IP Virtual Server */ static int __init ip_vs_init(void) { int ret; ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); goto exit; } ip_vs_protocol_init(); ret = ip_vs_conn_init(); if (ret < 0) { pr_err("can't setup connection table.\n"); goto cleanup_protocol; } ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ if (ret < 0) goto cleanup_conn; ret = register_pernet_device(&ipvs_core_dev_ops); if (ret < 0) goto cleanup_sub; ret = ip_vs_register_nl_ioctl(); if (ret < 0) { pr_err("can't register netlink/ioctl.\n"); goto cleanup_dev; } pr_info("ipvs loaded.\n"); return ret; cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: unregister_pernet_subsys(&ipvs_core_ops); cleanup_conn: ip_vs_conn_cleanup(); cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); exit: return ret; } static void __exit ip_vs_cleanup(void) { ip_vs_unregister_nl_ioctl(); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_conn_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); /* common rcu_barrier() used by: * - ip_vs_control_cleanup() */ rcu_barrier(); pr_info("ipvs unloaded.\n"); } module_init(ip_vs_init); module_exit(ip_vs_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Virtual Server"); |
1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_log_format.h" #include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. */ /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary * Returns 1 for empty, 0 for non-empty. */ int xfs_bitmap_empty(uint *map, uint size) { uint i; for (i = 0; i < size; i++) { if (map[i] != 0) return 0; } return 1; } /* * Count the number of contiguous bits set in the bitmap starting with bit * start_bit. Size is the size of the bitmap in words. */ int xfs_contig_bits(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = 0; uint tmp; size <<= BIT_TO_WORD_SHIFT; ASSERT(start_bit < size); size -= start_bit & ~(NBWORD - 1); start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to one first offset bits prior to start */ tmp |= (~0U >> (NBWORD-start_bit)); if (tmp != ~0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != ~0U) goto found; result += NBWORD; size -= NBWORD; } return result - start_bit; found: return result + ffz(tmp) - start_bit; } /* * This takes the bit number to start looking from and * returns the next set bit from there. It returns -1 * if there are no more bits set or the start bit is * beyond the end of the bitmap. * * Size is the number of words, not bytes, in the bitmap. */ int xfs_next_bit(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = start_bit & ~(NBWORD - 1); uint tmp; size <<= BIT_TO_WORD_SHIFT; if (start_bit >= size) return -1; size -= result; start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to zero first offset bits prior to start */ tmp &= (~0U << start_bit); if (tmp != 0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != 0U) goto found; result += NBWORD; size -= NBWORD; } return -1; found: return result + ffs(tmp) - 1; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_PREEMPT_H #define __ASM_PREEMPT_H #include <asm/rmwcc.h> #include <asm/percpu.h> #include <asm/current.h> #include <linux/thread_info.h> #include <linux/static_call_types.h> /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. */ #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) /* * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users * that think a non-zero value indicates we cannot preempt. */ static __always_inline int preempt_count(void) { return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { int old, new; old = raw_cpu_read_4(pcpu_hot.preempt_count); do { new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new)); } /* * must be macros to avoid header recursion hell */ #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* * We fold the NEED_RESCHED bit into the preempt count such that * preempt_enable() can decrement and test for needing to reschedule with a * single instruction. * * We invert the actual bit, so that when the decrement hits 0 we know we both * need to resched (the bit is cleared) and can resched (no preempt count). */ static __always_inline void set_preempt_need_resched(void) { raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); } /* * The various preempt_count add/sub methods */ static __always_inline void __preempt_count_add(int val) { raw_cpu_add_4(pcpu_hot.preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { raw_cpu_add_4(pcpu_hot.preempt_count, -val); } /* * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule * a decrement which hits zero means we have no preempt_count and should * reschedule. */ static __always_inline bool __preempt_count_dec_and_test(void) { return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, __percpu_arg([var])); } /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); extern asmlinkage void preempt_schedule_thunk(void); #define preempt_schedule_dynamic_enabled preempt_schedule_thunk #define preempt_schedule_dynamic_disabled NULL extern asmlinkage void preempt_schedule_notrace(void); extern asmlinkage void preempt_schedule_notrace_thunk(void); #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk #define preempt_schedule_notrace_dynamic_disabled NULL #ifdef CONFIG_PREEMPT_DYNAMIC DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); #define __preempt_schedule() \ do { \ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ } while (0) DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); #define __preempt_schedule_notrace() \ do { \ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \ } while (0) #else /* PREEMPT_DYNAMIC */ #define __preempt_schedule() \ asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT); #define __preempt_schedule_notrace() \ asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT); #endif /* PREEMPT_DYNAMIC */ #endif /* PREEMPTION */ #endif /* __ASM_PREEMPT_H */ |
27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | #ifndef IOU_ALLOC_CACHE_H #define IOU_ALLOC_CACHE_H /* * Don't allow the cache to grow beyond this size. */ #define IO_ALLOC_CACHE_MAX 512 struct io_cache_entry { struct io_wq_work_node node; }; static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, struct io_cache_entry *entry) { if (cache->nr_cached < cache->max_cached) { cache->nr_cached++; wq_stack_add_head(&entry->node, &cache->list); /* KASAN poisons object */ kasan_slab_free_mempool(entry); return true; } return false; } static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache) { return !cache->list.next; } static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache) { if (cache->list.next) { struct io_cache_entry *entry; entry = container_of(cache->list.next, struct io_cache_entry, node); kasan_unpoison_range(entry, cache->elem_size); cache->list.next = cache->list.next->next; cache->nr_cached--; return entry; } return NULL; } static inline void io_alloc_cache_init(struct io_alloc_cache *cache, unsigned max_nr, size_t size) { cache->list.next = NULL; cache->nr_cached = 0; cache->max_cached = max_nr; cache->elem_size = size; } static inline void io_alloc_cache_free(struct io_alloc_cache *cache, void (*free)(struct io_cache_entry *)) { while (1) { struct io_cache_entry *entry = io_alloc_cache_get(cache); if (!entry) break; free(entry); } cache->nr_cached = 0; } #endif |
2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | // SPDX-License-Identifier: GPL-2.0-only /* * net/psample/psample.c - Netlink channel for packet sampling * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/module.h> #include <linux/timekeeping.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/psample.h> #include <linux/spinlock.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> #define PSAMPLE_MAX_PACKET_SIZE 0xffff static LIST_HEAD(psample_groups_list); static DEFINE_SPINLOCK(psample_groups_lock); /* multicast groups */ enum psample_nl_multicast_groups { PSAMPLE_NL_MCGRP_CONFIG, PSAMPLE_NL_MCGRP_SAMPLE, }; static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; static int psample_group_nl_fill(struct sk_buff *msg, struct psample_group *group, enum psample_command cmd, u32 portid, u32 seq, int flags) { void *hdr; int ret; hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num); if (ret < 0) goto error; ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount); if (ret < 0) goto error; ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq); if (ret < 0) goto error; genlmsg_end(msg, hdr); return 0; error: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct psample_group *group; int start = cb->args[0]; int idx = 0; int err; spin_lock_bh(&psample_groups_lock); list_for_each_entry(group, &psample_groups_list, list) { if (!net_eq(group->net, sock_net(msg->sk))) continue; if (idx < start) { idx++; continue; } err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) break; idx++; } spin_unlock_bh(&psample_groups_lock); cb->args[0] = idx; return msg->len; } static const struct genl_small_ops psample_nl_ops[] = { { .cmd = PSAMPLE_CMD_GET_GROUP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = psample_nl_cmd_get_group_dumpit, /* can be retrieved by unprivileged users */ } }; static struct genl_family psample_nl_family __ro_after_init = { .name = PSAMPLE_GENL_NAME, .version = PSAMPLE_GENL_VERSION, .maxattr = PSAMPLE_ATTR_MAX, .netnsok = true, .module = THIS_MODULE, .mcgrps = psample_nl_mcgrps, .small_ops = psample_nl_ops, .n_small_ops = ARRAY_SIZE(psample_nl_ops), .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1, .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), }; static void psample_group_notify(struct psample_group *group, enum psample_command cmd) { struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return; err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI); if (!err) genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0, PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC); else nlmsg_free(msg); } static struct psample_group *psample_group_create(struct net *net, u32 group_num) { struct psample_group *group; group = kzalloc(sizeof(*group), GFP_ATOMIC); if (!group) return NULL; group->net = net; group->group_num = group_num; list_add_tail(&group->list, &psample_groups_list); psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP); return group; } static void psample_group_destroy(struct psample_group *group) { psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); list_del(&group->list); kfree_rcu(group, rcu); } static struct psample_group * psample_group_lookup(struct net *net, u32 group_num) { struct psample_group *group; list_for_each_entry(group, &psample_groups_list, list) if ((group->group_num == group_num) && (group->net == net)) return group; return NULL; } struct psample_group *psample_group_get(struct net *net, u32 group_num) { struct psample_group *group; spin_lock_bh(&psample_groups_lock); group = psample_group_lookup(net, group_num); if (!group) { group = psample_group_create(net, group_num); if (!group) goto out; } group->refcount++; out: spin_unlock_bh(&psample_groups_lock); return group; } EXPORT_SYMBOL_GPL(psample_group_get); void psample_group_take(struct psample_group *group) { spin_lock_bh(&psample_groups_lock); group->refcount++; spin_unlock_bh(&psample_groups_lock); } EXPORT_SYMBOL_GPL(psample_group_take); void psample_group_put(struct psample_group *group) { spin_lock_bh(&psample_groups_lock); if (--group->refcount == 0) psample_group_destroy(group); spin_unlock_bh(&psample_groups_lock); } EXPORT_SYMBOL_GPL(psample_group_put); #ifdef CONFIG_INET static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { unsigned short tun_proto = ip_tunnel_info_af(tun_info); const void *tun_opts = ip_tunnel_info_opts(tun_info); const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; if (tun_key->tun_flags & TUNNEL_KEY && nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, PSAMPLE_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)) return -EMSGSIZE; switch (tun_proto) { case AF_INET: if (tun_key->u.ipv4.src && nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->u.ipv4.src)) return -EMSGSIZE; if (tun_key->u.ipv4.dst && nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->u.ipv4.dst)) return -EMSGSIZE; break; case AF_INET6: if (!ipv6_addr_any(&tun_key->u.ipv6.src) && nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->u.ipv6.src)) return -EMSGSIZE; if (!ipv6_addr_any(&tun_key->u.ipv6.dst) && nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->u.ipv6.dst)) return -EMSGSIZE; break; } if (tun_key->tos && nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos)) return -EMSGSIZE; if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) return -EMSGSIZE; if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; if ((tun_key->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (tun_key->tp_src && nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src)) return -EMSGSIZE; if (tun_key->tp_dst && nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) return -EMSGSIZE; if ((tun_key->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (tun_opts_len) { if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; } return 0; } static int psample_ip_tun_to_nlattr(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct nlattr *nla; int err; nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL); if (!nla) return -EMSGSIZE; err = __psample_ip_tun_to_nlattr(skb, tun_info); if (err) { nla_nest_cancel(skb, nla); return err; } nla_nest_end(skb, nla); return 0; } static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) { unsigned short tun_proto = ip_tunnel_info_af(tun_info); const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ if (tun_key->tun_flags & TUNNEL_KEY) sum += nla_total_size_64bit(sizeof(u64)); if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) sum += nla_total_size(0); switch (tun_proto) { case AF_INET: if (tun_key->u.ipv4.src) sum += nla_total_size(sizeof(u32)); if (tun_key->u.ipv4.dst) sum += nla_total_size(sizeof(u32)); break; case AF_INET6: if (!ipv6_addr_any(&tun_key->u.ipv6.src)) sum += nla_total_size(sizeof(struct in6_addr)); if (!ipv6_addr_any(&tun_key->u.ipv6.dst)) sum += nla_total_size(sizeof(struct in6_addr)); break; } if (tun_key->tos) sum += nla_total_size(sizeof(u8)); sum += nla_total_size(sizeof(u8)); /* TTL */ if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) sum += nla_total_size(0); if (tun_key->tun_flags & TUNNEL_CSUM) sum += nla_total_size(0); if (tun_key->tp_src) sum += nla_total_size(sizeof(u16)); if (tun_key->tp_dst) sum += nla_total_size(sizeof(u16)); if (tun_key->tun_flags & TUNNEL_OAM) sum += nla_total_size(0); if (tun_opts_len) { if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) sum += nla_total_size(tun_opts_len); else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) sum += nla_total_size(tun_opts_len); } return sum; } #endif void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, u32 sample_rate, const struct psample_metadata *md) { ktime_t tstamp = ktime_get_real(); int out_ifindex = md->out_ifindex; int in_ifindex = md->in_ifindex; u32 trunc_size = md->trunc_size; #ifdef CONFIG_INET struct ip_tunnel_info *tun_info; #endif struct sk_buff *nl_skb; int data_len; int meta_len; void *data; int ret; meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) + (out_ifindex ? nla_total_size(sizeof(u16)) : 0) + (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) + (md->out_tc_occ_valid ? nla_total_size_64bit(sizeof(u64)) : 0) + (md->latency_valid ? nla_total_size_64bit(sizeof(u64)) : 0) + nla_total_size(sizeof(u32)) + /* sample_rate */ nla_total_size(sizeof(u32)) + /* orig_size */ nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)) + /* seq */ nla_total_size_64bit(sizeof(u64)) + /* timestamp */ nla_total_size(sizeof(u16)); /* protocol */ #ifdef CONFIG_INET tun_info = skb_tunnel_info(skb); if (tun_info) meta_len += psample_tunnel_meta_len(tun_info); #endif data_len = min(skb->len, trunc_size); if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN - NLA_ALIGNTO; nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC); if (unlikely(!nl_skb)) return; data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0, PSAMPLE_CMD_SAMPLE); if (unlikely(!data)) goto error; if (in_ifindex) { ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex); if (unlikely(ret < 0)) goto error; } if (out_ifindex) { ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex); if (unlikely(ret < 0)) goto error; } ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate); if (unlikely(ret < 0)) goto error; ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len); if (unlikely(ret < 0)) goto error; ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num); if (unlikely(ret < 0)) goto error; ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++); if (unlikely(ret < 0)) goto error; if (md->out_tc_valid) { ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OUT_TC, md->out_tc); if (unlikely(ret < 0)) goto error; } if (md->out_tc_occ_valid) { ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_OUT_TC_OCC, md->out_tc_occ, PSAMPLE_ATTR_PAD); if (unlikely(ret < 0)) goto error; } if (md->latency_valid) { ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_LATENCY, md->latency, PSAMPLE_ATTR_PAD); if (unlikely(ret < 0)) goto error; } ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_TIMESTAMP, ktime_to_ns(tstamp), PSAMPLE_ATTR_PAD); if (unlikely(ret < 0)) goto error; ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_PROTO, be16_to_cpu(skb->protocol)); if (unlikely(ret < 0)) goto error; if (data_len) { int nla_len = nla_total_size(data_len); struct nlattr *nla; nla = skb_put(nl_skb, nla_len); nla->nla_type = PSAMPLE_ATTR_DATA; nla->nla_len = nla_attr_size(data_len); if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) goto error; } #ifdef CONFIG_INET if (tun_info) { ret = psample_ip_tun_to_nlattr(nl_skb, tun_info); if (unlikely(ret < 0)) goto error; } #endif genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); return; error: pr_err_ratelimited("Could not create psample log message\n"); nlmsg_free(nl_skb); } EXPORT_SYMBOL_GPL(psample_sample_packet); static int __init psample_module_init(void) { return genl_register_family(&psample_nl_family); } static void __exit psample_module_exit(void) { genl_unregister_family(&psample_nl_family); } module_init(psample_module_init); module_exit(psample_module_exit); MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>"); MODULE_DESCRIPTION("netlink channel for packet sampling"); MODULE_LICENSE("GPL v2"); |
388 22 12305 19 2 18 7 2 38 4 22 27 10 14 8 25 41 45 49 7 7 1 4 25 23 21 4 7 4 17 4 4 20 3 6 4 2 1 4 4 56 56 41 26 8 23 30 1 44 26 39 1 1 14 9 7 5 4 13 4 1 1 6 2 8 7 3 7 8 18 19 2 4 6 9 1 3 8 1 8 3 8 14 4 3 58 29 34 17 16 43 31 1 14 31 14 46 2 34 55 5 21 26 98 44 4 50 4 2 1 1 2 80 1 4 8 1 1 4 4 4 2 67 4 19 14 3 2 6 1 12 2 9 19 1 15 10 11 3 27 6 8 16 54 58 8 3413 11 21 10 21 5 1 1 10 3551 3543 2 1 6 12331 194 7 4 4 4 26 26 7 10636 20 20 10612 10607 11 3369 3373 7555 187 3006 2 1 3161 1192 14 36 4 32 36 34 38 5 2 18 2 20 1 1 2 358 357 1 1 1 1 1 336 335 1 1 1 486 485 16 4 12 2 4 10 1 3 1 1 3 2 2 1 3 1 5 5 4 1 13 4 12 10 6 8 8 1 8 8 1 || // SPDX-License-Identifier: GPL-2.0-only /* * Simple NUMA memory policy for the Linux kernel. * * Copyright 2003,2004 Andi Kleen, SuSE Labs. * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc. * * NUMA policy allows the user to give hints in which node(s) memory should * be allocated. * * Support four policies per VMA and per process: * * The VMA policy has priority over the process policy for a page fault. * * interleave Allocate memory interleaved over a set of nodes, * with normal fallback if it fails. * For VMA based allocations this interleaves based on the * offset into the backing object or offset into the mapping * for anonymous memory. For process policy an process counter * is used. * * bind Only allocate memory on a specific set of nodes, * no fallback. * FIXME: memory is allocated starting with the first node * to the last. It would be better if bind would truly restrict * the allocation to memory nodes instead * * preferred Try a specific node first before normal fallback. * As a special case NUMA_NO_NODE here means do the allocation * on the local CPU. This is normally identical to default, * but useful to set in a VMA when you have a non default * process policy. * * preferred many Try a set of nodes first before normal fallback. This is * similar to preferred without the special case. * * default Allocate on the local node first, or when on a VMA * use the process policy. This is what Linux always did * in a NUMA aware kernel and still does by, ahem, default. * * The process policy is applied for most non interrupt memory allocations * in that process' context. Interrupts ignore the policies and always * try to allocate on the local CPU. The VMA policy is only applied for memory * allocations for a VMA in the VM. * * Currently there are a few corner cases in swapping where the policy * is not applied, but the majority should be handled. When process policy * is used it is not remembered over swap outs/swap ins. * * Only the highest zone in the zone hierarchy gets policied. Allocations * requesting a lower zone just use default policy. This implies that * on systems with highmem kernel lowmem allocation don't get policied. * Same with GFP_DMA allocations. * * For shmem/tmpfs shared memory the policy is shared between * all users and remembered even when nobody has memory mapped. */ /* Notebook: fix mmap readahead to honour policy and enable policy for any page cache object statistics for bigpages global policy for page cache? currently it uses process policy. Requires first item above. handle mremap for shared memory (currently ignored for the policy) grows down? make bind policy root only? It can trigger oom much faster and the kernel is not always grateful with that. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mempolicy.h> #include <linux/pagewalk.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/nodemask.h> #include <linux/cpuset.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/export.h> #include <linux/nsproxy.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compat.h> #include <linux/ptrace.h> #include <linux/swap.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/migrate.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/mm_inline.h> #include <linux/mmu_notifier.h> #include <linux/printk.h> #include <linux/swapops.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> #include "internal.h" /* Internal flags */ #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ #define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */ static struct kmem_cache *policy_cache; static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ enum zone_type policy_zone = 0; /* * run-time system-wide default policy => local allocation */ static struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ .mode = MPOL_LOCAL, }; static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /** * numa_nearest_node - Find nearest node by state * @node: Node id to start the search * @state: State to filter the search * * Lookup the closest node by distance if @nid is not in state. * * Return: this @node if it is in state, otherwise the closest node by distance */ int numa_nearest_node(int node, unsigned int state) { int min_dist = INT_MAX, dist, n, min_node; if (state >= NR_NODE_STATES) return -EINVAL; if (node == NUMA_NO_NODE || node_state(node, state)) return node; min_node = node; for_each_node_state(n, state) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; min_node = n; } } return min_node; } EXPORT_SYMBOL_GPL(numa_nearest_node); struct mempolicy *get_task_policy(struct task_struct *p) { struct mempolicy *pol = p->mempolicy; int node; if (pol) return pol; node = numa_node_id(); if (node != NUMA_NO_NODE) { pol = &preferred_node_policy[node]; /* preferred_node_policy is not initialised early in boot */ if (pol->mode) return pol; } return &default_policy; } static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); } mpol_ops[MPOL_MAX]; static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { return pol->flags & MPOL_MODE_FLAGS; } static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, const nodemask_t *rel) { nodemask_t tmp; nodes_fold(tmp, *orig, nodes_weight(*rel)); nodes_onto(*ret, tmp, *rel); } static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; pol->nodes = *nodes; return 0; } static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; nodes_clear(pol->nodes); node_set(first_node(*nodes), pol->nodes); return 0; } /* * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if * any, for the new policy. mpol_new() has already validated the nodes * parameter with respect to the policy mode and flags. * * Must be called holding task's alloc_lock to protect task's mems_allowed * and mempolicy. May also be called holding the mmap_lock for write. */ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes, struct nodemask_scratch *nsc) { int ret; /* * Default (pol==NULL) resp. local memory policies are not a * subject of any remapping. They also do not need any special * constructor. */ if (!pol || pol->mode == MPOL_LOCAL) return 0; /* Check N_MEMORY */ nodes_and(nsc->mask1, cpuset_current_mems_allowed, node_states[N_MEMORY]); VM_BUG_ON(!nodes); if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); else nodes_and(nsc->mask2, *nodes, nsc->mask1); if (mpol_store_user_nodemask(pol)) pol->w.user_nodemask = *nodes; else pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed; ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); return ret; } /* * This function just creates a new policy, does some check and simple * initialization. You must invoke mpol_set_nodemask() to set nodes. */ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *policy; if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); return NULL; } VM_BUG_ON(!nodes); /* * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation). * All other modes require a valid pointer to a non-empty nodemask. */ if (mode == MPOL_PREFERRED) { if (nodes_empty(*nodes)) { if (((flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES))) return ERR_PTR(-EINVAL); mode = MPOL_LOCAL; } } else if (mode == MPOL_LOCAL) { if (!nodes_empty(*nodes) || (flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES)) return ERR_PTR(-EINVAL); } else if (nodes_empty(*nodes)) return ERR_PTR(-EINVAL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); atomic_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; policy->home_node = NUMA_NO_NODE; return policy; } /* Slow path of a mpol destructor. */ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; kmem_cache_free(policy_cache, pol); } static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) { } static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { nodemask_t tmp; if (pol->flags & MPOL_F_STATIC_NODES) nodes_and(tmp, pol->w.user_nodemask, *nodes); else if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); else { nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, *nodes); pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) tmp = *nodes; pol->nodes = tmp; } static void mpol_rebind_preferred(struct mempolicy *pol, const nodemask_t *nodes) { pol->w.cpuset_mems_allowed = *nodes; } /* * mpol_rebind_policy - Migrate a policy to a different set of nodes * * Per-vma policies are protected by mmap_lock. Allocations using per-task * policies are protected by task->mems_allowed_seq to prevent a premature * OOM/allocation failure due to parallel nodemask modification. */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) return; mpol_ops[pol->mode].rebind(pol, newmask); } /* * Wrapper for mpol_rebind_policy() that just requires task * pointer, and updates task mempolicy. * * Called with task's alloc_lock held. */ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { mpol_rebind_policy(tsk->mempolicy, new); } /* * Rebind each vma in mm to new nodemask. * * Call holding a reference to mm. Takes mm->mmap_lock during call. */ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); mmap_write_lock(mm); for_each_vma(vmi, vma) { vma_start_write(vma); mpol_rebind_policy(vma->vm_policy, new); } mmap_write_unlock(mm); } static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { [MPOL_DEFAULT] = { .rebind = mpol_rebind_default, }, [MPOL_INTERLEAVE] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_PREFERRED] = { .create = mpol_new_preferred, .rebind = mpol_rebind_preferred, }, [MPOL_BIND] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_LOCAL] = { .rebind = mpol_rebind_default, }, [MPOL_PREFERRED_MANY] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_preferred, }, }; static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags); static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid); static bool strictly_unmovable(unsigned long flags) { /* * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO * if any misplaced page is found. */ return (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) == MPOL_MF_STRICT; } struct migration_mpol { /* for alloc_migration_target_by_mpol() */ struct mempolicy *pol; pgoff_t ilx; }; struct queue_pages { struct list_head *pagelist; unsigned long flags; nodemask_t *nmask; unsigned long start; unsigned long end; struct vm_area_struct *first; struct folio *large; /* note last large folio encountered */ long nr_failed; /* could not be isolated at this time */ }; /* * Check if the folio's nid is in qp->nmask. * * If MPOL_MF_INVERT is set in qp->flags, check if the nid is * in the invert of qp->nmask. */ static inline bool queue_folio_required(struct folio *folio, struct queue_pages *qp) { int nid = folio_nid(folio); unsigned long flags = qp->flags; return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) { struct folio *folio; struct queue_pages *qp = walk->private; if (unlikely(is_pmd_migration_entry(*pmd))) { qp->nr_failed++; return; } folio = pfn_folio(pmd_pfn(*pmd)); if (is_huge_zero_page(&folio->page)) { walk->action = ACTION_CONTINUE; return; } if (!queue_folio_required(folio, qp)) return; if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma) || !migrate_folio_add(folio, qp->pagelist, qp->flags)) qp->nr_failed++; } /* * Scan through folios, checking if they satisfy the required conditions, * moving them from LRU to local pagelist for migration if they do (or not). * * queue_folios_pte_range() has two possible return values: * 0 - continue walking to scan for more, even if an existing folio on the * wrong node could not be isolated and queued for migration. * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL, * and an existing folio was on a node that does not follow the policy. */ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { queue_folios_pmd(pmd, walk); spin_unlock(ptl); goto out; } mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (pte_none(ptent)) continue; if (!pte_present(ptent)) { if (is_migration_entry(pte_to_swp_entry(ptent))) qp->nr_failed++; continue; } folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; /* * vm_normal_folio() filters out zero pages, but there might * still be reserved folios to skip, perhaps in a VDSO. */ if (folio_test_reserved(folio)) continue; if (!queue_folio_required(folio, qp)) continue; if (folio_test_large(folio)) { /* * A large folio can only be isolated from LRU once, * but may be mapped by many PTEs (and Copy-On-Write may * intersperse PTEs of other, order 0, folios). This is * a common case, so don't mistake it for failure (but * there can be other cases of multi-mapped pages which * this quick check does not help to filter out - and a * search of the pagelist might grow to be prohibitive). * * migrate_pages(&pagelist) returns nr_failed folios, so * check "large" now so that queue_pages_range() returns * a comparable nr_failed folios. This does imply that * if folio could not be isolated for some racy reason * at its first PTE, later PTEs will not give it another * chance of isolation; but keeps the accounting simple. */ if (folio == qp->large) continue; qp->large = folio; } if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(vma) || !migrate_folio_add(folio, qp->pagelist, flags)) { qp->nr_failed++; if (strictly_unmovable(flags)) break; } } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); out: if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; return 0; } static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; struct folio *folio; spinlock_t *ptl; pte_t entry; ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); entry = huge_ptep_get(pte); if (!pte_present(entry)) { if (unlikely(is_hugetlb_entry_migration(entry))) qp->nr_failed++; goto unlock; } folio = pfn_folio(pte_pfn(entry)); if (!queue_folio_required(folio, qp)) goto unlock; if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma)) { qp->nr_failed++; goto unlock; } /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * To check if the folio is shared, ideally we want to make sure * every page is mapped to the same process. Doing that is very * expensive, so check the estimated sharers of the folio instead. */ if ((flags & MPOL_MF_MOVE_ALL) || (folio_estimated_sharers(folio) == 1 && !hugetlb_pmd_shared(pte))) if (!isolate_hugetlb(folio, qp->pagelist)) qp->nr_failed++; unlock: spin_unlock(ptl); if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; #endif return 0; } #ifdef CONFIG_NUMA_BALANCING /* * This is used to mark a range of virtual addresses to be inaccessible. * These are later cleared by a NUMA hinting fault. Depending on these * faults, pages may be migrated for better NUMA placement. * * This is assuming that NUMA faults are handled using PROT_NONE. If * an architecture makes a different choice, it will need further * changes to the core. */ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mmu_gather tlb; long nr_updated; tlb_gather_mmu(&tlb, vma->vm_mm); nr_updated = change_protection(&tlb, vma, addr, end, MM_CP_PROT_NUMA); if (nr_updated > 0) count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); tlb_finish_mmu(&tlb); return nr_updated; } #endif /* CONFIG_NUMA_BALANCING */ static int queue_pages_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *next, *vma = walk->vma; struct queue_pages *qp = walk->private; unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; /* range check first */ VM_BUG_ON_VMA(!range_in_vma(vma, start, end), vma); if (!qp->first) { qp->first = vma; if (!(flags & MPOL_MF_DISCONTIG_OK) && (qp->start < vma->vm_start)) /* hole at head side of range */ return -EFAULT; } next = find_vma(vma->vm_mm, vma->vm_end); if (!(flags & MPOL_MF_DISCONTIG_OK) && ((vma->vm_end < qp->end) && (!next || vma->vm_end < next->vm_start))) /* hole at middle or tail of range */ return -EFAULT; /* * Need check MPOL_MF_STRICT to return -EIO if possible * regardless of vma_migratable */ if (!vma_migratable(vma) && !(flags & MPOL_MF_STRICT)) return 1; if (endvma > end) endvma = end; /* * Check page nodes, and queue pages to move, in the current vma. * But if no moving, and no strict checking, the scan can be skipped. */ if (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) return 0; return 1; } static const struct mm_walk_ops queue_pages_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_WRLOCK, }; /* * Walk through page tables and collect pages to be migrated. * * If pages found in a given range are not on the required set of @nodes, * and migration is allowed, they are isolated and queued to @pagelist. * * queue_pages_range() may return: * 0 - all pages already on the right node, or successfully queued for moving * (or neither strict checking nor moving requested: only range checking). * >0 - this number of misplaced folios could not be queued for moving * (a hugetlbfs page or a transparent huge page being counted as 1). * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs. * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified. */ static long queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, nodemask_t *nodes, unsigned long flags, struct list_head *pagelist) { int err; struct queue_pages qp = { .pagelist = pagelist, .flags = flags, .nmask = nodes, .start = start, .end = end, .first = NULL, }; const struct mm_walk_ops *ops = (flags & MPOL_MF_WRLOCK) ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); if (!qp.first) /* whole range in hole */ err = -EFAULT; return err ? : qp.nr_failed; } /* * Apply policy to a single VMA * This must be called with the mmap_lock held for writing. */ static int vma_replace_policy(struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct mempolicy *old; struct mempolicy *new; vma_assert_write_locked(vma); new = mpol_dup(pol); if (IS_ERR(new)) return PTR_ERR(new); if (vma->vm_ops && vma->vm_ops->set_policy) { err = vma->vm_ops->set_policy(vma, new); if (err) goto err_out; } old = vma->vm_policy; vma->vm_policy = new; /* protected by mmap_lock */ mpol_put(old); return 0; err_out: mpol_put(new); return err; } /* Split or merge the VMA (if required) and apply the new policy */ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, struct mempolicy *new_pol) { unsigned long vmstart, vmend; vmend = min(end, vma->vm_end); if (start > vma->vm_start) { *prev = vma; vmstart = start; } else { vmstart = vma->vm_start; } if (mpol_equal(vma->vm_policy, new_pol)) { *prev = vma; return 0; } vma = vma_modify_policy(vmi, *prev, vma, vmstart, vmend, new_pol); if (IS_ERR(vma)) return PTR_ERR(vma); *prev = vma; return vma_replace_policy(vma, new_pol); } /* Set the process memory policy */ static long do_set_mempolicy(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *new, *old; NODEMASK_SCRATCH(scratch); int ret; if (!scratch) return -ENOMEM; new = mpol_new(mode, flags, nodes); if (IS_ERR(new)) { ret = PTR_ERR(new); goto out; } task_lock(current); ret = mpol_set_nodemask(new, nodes, scratch); if (ret) { task_unlock(current); mpol_put(new); goto out; } old = current->mempolicy; current->mempolicy = new; if (new && new->mode == MPOL_INTERLEAVE) current->il_prev = MAX_NUMNODES-1; task_unlock(current); mpol_put(old); ret = 0; out: NODEMASK_SCRATCH_FREE(scratch); return ret; } /* * Return nodemask for policy for get_mempolicy() query * * Called with task's alloc_lock held */ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes) { nodes_clear(*nodes); if (pol == &default_policy) return; switch (pol->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: *nodes = pol->nodes; break; case MPOL_LOCAL: /* return empty node mask for local allocation */ break; default: BUG(); } } static int lookup_node(struct mm_struct *mm, unsigned long addr) { struct page *p = NULL; int ret; ret = get_user_pages_fast(addr & PAGE_MASK, 1, 0, &p); if (ret > 0) { ret = page_to_nid(p); put_page(p); } return ret; } /* Retrieve NUMA policy */ static long do_get_mempolicy(int *policy, nodemask_t *nmask, unsigned long addr, unsigned long flags) { int err; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) return -EINVAL; if (flags & MPOL_F_MEMS_ALLOWED) { if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; *policy = 0; /* just so it's initialized */ task_lock(current); *nmask = cpuset_current_mems_allowed; task_unlock(current); return 0; } if (flags & MPOL_F_ADDR) { pgoff_t ilx; /* ignored here */ /* * Do NOT fall back to task policy if the * vma/shared policy at addr is NULL. We * want to return MPOL_DEFAULT in this case. */ mmap_read_lock(mm); vma = vma_lookup(mm, addr); if (!vma) { mmap_read_unlock(mm); return -EFAULT; } pol = __get_vma_policy(vma, addr, &ilx); } else if (addr) return -EINVAL; if (!pol) pol = &default_policy; /* indicates default behavior */ if (flags & MPOL_F_NODE) { if (flags & MPOL_F_ADDR) { /* * Take a refcount on the mpol, because we are about to * drop the mmap_lock, after which only "pol" remains * valid, "vma" is stale. */ pol_refcount = pol; vma = NULL; mpol_get(pol); mmap_read_unlock(mm); err = lookup_node(mm, addr); if (err < 0) goto out; *policy = err; } else if (pol == current->mempolicy && pol->mode == MPOL_INTERLEAVE) { *policy = next_node_in(current->il_prev, pol->nodes); } else { err = -EINVAL; goto out; } } else { *policy = pol == &default_policy ? MPOL_DEFAULT : pol->mode; /* * Internal mempolicy flags must be masked off before exposing * the policy to userspace. */ *policy |= (pol->flags & MPOL_MODE_FLAGS); } err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { *nmask = pol->w.user_nodemask; } else { task_lock(current); get_policy_nodemask(pol, nmask); task_unlock(current); } } out: mpol_cond_put(pol); if (vma) mmap_read_unlock(mm); if (pol_refcount) mpol_put(pol_refcount); return err; } #ifdef CONFIG_MIGRATION static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * To check if the folio is shared, ideally we want to make sure * every page is mapped to the same process. Doing that is very * expensive, so check the estimated sharers of the folio instead. */ if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) { if (folio_isolate_lru(folio)) { list_add_tail(&folio->lru, foliolist); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } else { /* * Non-movable folio may reach here. And, there may be * temporary off LRU folios or non-LRU movable folios. * Treat them as unmovable folios since they can't be * isolated, so they can't be moved at the moment. */ return false; } } return true; } /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */ static long migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) { nodemask_t nmask; struct vm_area_struct *vma; LIST_HEAD(pagelist); long nr_failed; long err = 0; struct migration_target_control mtc = { .nid = dest, .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, }; nodes_clear(nmask); node_set(source, nmask); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); mmap_read_lock(mm); vma = find_vma(mm, 0); /* * This does not migrate the range, but isolates all pages that * need migration. Between passing in the full user address * space range and MPOL_MF_DISCONTIG_OK, this call cannot fail, * but passes back the count of pages which could not be isolated. */ nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); mmap_read_unlock(mm); if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); if (err) putback_movable_pages(&pagelist); } if (err >= 0) err += nr_failed; return err; } /* * Move pages between the two nodesets so as to preserve the physical * layout as much as possible. * * Returns the number of page that could not be moved. */ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { long nr_failed = 0; long err = 0; nodemask_t tmp; lru_cache_disable(); /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' * bit in 'tmp', and return that <source, dest> pair for migration. * The pair of nodemasks 'to' and 'from' define the map. * * If no pair of bits is found that way, fallback to picking some * pair of 'source' and 'dest' bits that are not the same. If the * 'source' and 'dest' bits are the same, this represents a node * that will be migrating to itself, so no pages need move. * * If no bits are left in 'tmp', or if all remaining bits left * in 'tmp' correspond to the same bit in 'to', return false * (nothing left to migrate). * * This lets us pick a pair of nodes to migrate between, such that * if possible the dest node is not already occupied by some other * source node, minimizing the risk of overloading the memory on a * node that would happen if we migrated incoming memory to a node * before migrating outgoing memory source that same node. * * A single scan of tmp is sufficient. As we go, we remember the * most recent <s, d> pair that moved (s != d). If we find a pair * that not only moved, but what's better, moved to an empty slot * (d is not set in tmp), then we break out then, with that pair. * Otherwise when we finish scanning from_tmp, we at least have the * most recent <s, d> pair that moved. If we get all the way through * the scan of tmp without finding any node that moved, much less * moved to an empty node, then there is nothing left worth migrating. */ tmp = *from; while (!nodes_empty(tmp)) { int s, d; int source = NUMA_NO_NODE; int dest = 0; for_each_node_mask(s, tmp) { /* * do_migrate_pages() tries to maintain the relative * node relationship of the pages established between * threads and memory areas. * * However if the number of source nodes is not equal to * the number of destination nodes we can not preserve * this node relative relationship. In that case, skip * copying memory from a node that is in the destination * mask. * * Example: [2,3,4] -> [3,4,5] moves everything. * [0-7] - > [3,4,5] moves only 0,1,2,6,7. */ if ((nodes_weight(*from) != nodes_weight(*to)) && (node_isset(s, *to))) continue; d = node_remap(s, *from, *to); if (s == d) continue; source = s; /* Node moved. Memorize */ dest = d; /* dest not in remaining from nodes? */ if (!node_isset(dest, tmp)) break; } if (source == NUMA_NO_NODE) break; node_clear(source, tmp); err = migrate_to_node(mm, source, dest, flags); if (err > 0) nr_failed += err; if (err < 0) break; } lru_cache_enable(); if (err < 0) return err; return (nr_failed < INT_MAX) ? nr_failed : INT_MAX; } /* * Allocate a new folio for page migration, according to NUMA mempolicy. */ static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { struct migration_mpol *mmpol = (struct migration_mpol *)private; struct mempolicy *pol = mmpol->pol; pgoff_t ilx = mmpol->ilx; struct page *page; unsigned int order; int nid = numa_node_id(); gfp_t gfp; order = folio_order(src); ilx += src->index >> order; if (folio_test_hugetlb(src)) { nodemask_t *nodemask; struct hstate *h; h = folio_hstate(src); gfp = htlb_alloc_mask(h); nodemask = policy_nodemask(gfp, pol, ilx, &nid); return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp); } if (folio_test_large(src)) gfp = GFP_TRANSHUGE; else gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL | __GFP_COMP; page = alloc_pages_mpol(gfp, order, pol, ilx, nid); return page_rmappable_folio(page); } #else static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { return false; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return -ENOSYS; } static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { return NULL; } #endif static long do_mbind(unsigned long start, unsigned long len, unsigned short mode, unsigned short mode_flags, nodemask_t *nmask, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct vma_iterator vmi; struct migration_mpol mmpol; struct mempolicy *new; unsigned long end; long err; long nr_failed; LIST_HEAD(pagelist); if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; if (start & ~PAGE_MASK) return -EINVAL; if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; new = mpol_new(mode, mode_flags, nmask); if (IS_ERR(new)) return PTR_ERR(new); /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all */ if (!new) flags |= MPOL_MF_DISCONTIG_OK; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_disable(); { NODEMASK_SCRATCH(scratch); if (scratch) { mmap_write_lock(mm); err = mpol_set_nodemask(new, nmask, scratch); if (err) mmap_write_unlock(mm); } else err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } if (err) goto mpol_out; /* * Lock the VMAs before scanning for pages to migrate, * to ensure we don't miss a concurrently inserted page. */ nr_failed = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist); if (nr_failed < 0) { err = nr_failed; nr_failed = 0; } else { vma_iter_init(&vmi, mm, start); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { err = mbind_range(&vmi, vma, &prev, start, end, new); if (err) break; } } if (!err && !list_empty(&pagelist)) { /* Convert MPOL_DEFAULT's NULL to task or default policy */ if (!new) { new = get_task_policy(current); mpol_get(new); } mmpol.pol = new; mmpol.ilx = 0; /* * In the interleaved case, attempt to allocate on exactly the * targeted nodes, for the first VMA to be migrated; for later * VMAs, the nodes will still be interleaved from the targeted * nodemask, but one by one may be selected differently. */ if (new->mode == MPOL_INTERLEAVE) { struct page *page; unsigned int order; unsigned long addr = -EFAULT; list_for_each_entry(page, &pagelist, lru) { if (!PageKsm(page)) break; } if (!list_entry_is_head(page, &pagelist, lru)) { vma_iter_init(&vmi, mm, start); for_each_vma_range(vmi, vma, end) { addr = page_address_in_vma(page, vma); if (addr != -EFAULT) break; } } if (addr != -EFAULT) { order = compound_order(page); /* We already know the pol, but not the ilx */ mpol_cond_put(get_vma_policy(vma, addr, order, &mmpol.ilx)); /* Set base from which to increment by index */ mmpol.ilx -= page->index >> order; } } } mmap_write_unlock(mm); if (!err && !list_empty(&pagelist)) { nr_failed |= migrate_pages(&pagelist, alloc_migration_target_by_mpol, NULL, (unsigned long)&mmpol, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL); } if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); mpol_out: mpol_put(new); if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_enable(); return err; } /* * User space interface with variable sized bitmaps for nodelists. */ static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long nlongs = BITS_TO_LONGS(maxnode); int ret; if (in_compat_syscall()) ret = compat_get_bitmap(mask, (const compat_ulong_t __user *)nmask, maxnode); else ret = copy_from_user(mask, nmask, nlongs * sizeof(unsigned long)); if (ret) return -EFAULT; if (maxnode % BITS_PER_LONG) mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1; return 0; } /* Copy a node mask from user space. */ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { --maxnode; nodes_clear(*nodes); if (maxnode == 0 || !nmask) return 0; if (maxnode > PAGE_SIZE*BITS_PER_BYTE) return -EINVAL; /* * When the user specified more nodes than supported just check * if the non supported part is all zero, one word at a time, * starting at the end. */ while (maxnode > MAX_NUMNODES) { unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG); unsigned long t; if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) return -EFAULT; if (maxnode - bits >= MAX_NUMNODES) { maxnode -= bits; } else { maxnode = MAX_NUMNODES; t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); } if (t) return -EINVAL; } return get_bitmap(nodes_addr(*nodes), nmask, maxnode); } /* Copy a kernel node mask to user space */ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); bool compat = in_compat_syscall(); if (compat) nbytes = BITS_TO_COMPAT_LONGS(nr_node_ids) * sizeof(compat_long_t); if (copy > nbytes) { if (copy > PAGE_SIZE) return -EINVAL; if (clear_user((char __user *)mask + nbytes, copy - nbytes)) return -EFAULT; copy = nbytes; maxnode = nr_node_ids; } if (compat) return compat_put_bitmap((compat_ulong_t __user *)mask, nodes_addr(*nodes), maxnode); return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } /* Basic parameter sanity check used by both mbind() and set_mempolicy() */ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) { *flags = *mode & MPOL_MODE_FLAGS; *mode &= ~MPOL_MODE_FLAGS; if ((unsigned int)(*mode) >= MPOL_MAX) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; if (*flags & MPOL_F_NUMA_BALANCING) { if (*mode != MPOL_BIND) return -EINVAL; *flags |= (MPOL_F_MOF | MPOL_F_MORON); } return 0; } static long kernel_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, unsigned long maxnode, unsigned int flags) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; start = untagged_addr(start); err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, unsigned long, home_node, unsigned long, flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct mempolicy *new, *old; unsigned long end; int err = -ENOENT; VMA_ITERATOR(vmi, mm, start); start = untagged_addr(start); if (start & ~PAGE_MASK) return -EINVAL; /* * flags is used for future extension if any. */ if (flags != 0) return -EINVAL; /* * Check home_node is online to avoid accessing uninitialized * NODE_DATA. */ if (home_node >= MAX_NUMNODES || !node_online(home_node)) return -EINVAL; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; mmap_write_lock(mm); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { /* * If any vma in the range got policy other than MPOL_BIND * or MPOL_PREFERRED_MANY we return error. We don't reset * the home node for vmas we already updated before. */ old = vma_policy(vma); if (!old) { prev = vma; continue; } if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; } new = mpol_dup(old); if (IS_ERR(new)) { err = PTR_ERR(new); break; } vma_start_write(vma); new->home_node = home_node; err = mbind_range(&vmi, vma, &prev, start, end, new); mpol_put(new); if (err) break; } mmap_write_unlock(mm); return err; } SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, unsigned long, mode, const unsigned long __user *, nmask, unsigned long, maxnode, unsigned int, flags) { return kernel_mbind(start, len, mode, nmask, maxnode, flags); } /* Set the process memory policy */ static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_set_mempolicy(lmode, mode_flags, &nodes); } SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, unsigned long, maxnode) { return kernel_set_mempolicy(mode, nmask, maxnode); } static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { struct mm_struct *mm = NULL; struct task_struct *task; nodemask_t task_nodes; int err; nodemask_t *old; nodemask_t *new; NODEMASK_SCRATCH(scratch); if (!scratch) return -ENOMEM; old = &scratch->mask1; new = &scratch->mask2; err = get_nodes(old, old_nodes, maxnode); if (err) goto out; err = get_nodes(new, new_nodes, maxnode); if (err) goto out; /* Find the mm_struct */ rcu_read_lock(); task = pid ? find_task_by_vpid(pid) : current; if (!task) { rcu_read_unlock(); err = -ESRCH; goto out; } get_task_struct(task); err = -EINVAL; /* * Check if this process has the right to modify the specified process. * Use the regular "ptrace_may_access()" checks. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out_put; } rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out_put; } task_nodes = cpuset_mems_allowed(current); nodes_and(*new, *new, task_nodes); if (nodes_empty(*new)) goto out_put; err = security_task_movememory(task); if (err) goto out_put; mm = get_task_mm(task); put_task_struct(task); if (!mm) { err = -EINVAL; goto out; } err = do_migrate_pages(mm, old, new, capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); mmput(mm); out: NODEMASK_SCRATCH_FREE(scratch); return err; out_put: put_task_struct(task); goto out; } SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, const unsigned long __user *, old_nodes, const unsigned long __user *, new_nodes) { return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes); } /* Retrieve NUMA policy */ static int kernel_get_mempolicy(int __user *policy, unsigned long __user *nmask, unsigned long maxnode, unsigned long addr, unsigned long flags) { int err; int pval; nodemask_t nodes; if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; addr = untagged_addr(addr); err = do_get_mempolicy(&pval, &nodes, addr, flags); if (err) return err; if (policy && put_user(pval, policy)) return -EFAULT; if (nmask) err = copy_nodes_to_user(nmask, maxnode, &nodes); return err; } SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long __user *, nmask, unsigned long, maxnode, unsigned long, addr, unsigned long, flags) { return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags); } bool vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return false; /* * DAX device mappings require predictable access latency, so avoid * incurring periodic faults. */ if (vma_is_dax(vma)) return false; if (is_vm_hugetlb_page(vma) && !hugepage_migration_supported(hstate_vma(vma))) return false; /* * Migration allocates pages in the highest zone. If we cannot * do so then migration (at least from node to node) is not * possible. */ if (vma->vm_file && gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) < policy_zone) return false; return true; } struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx) { *ilx = 0; return (vma->vm_ops && vma->vm_ops->get_policy) ? vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy; } /* * get_vma_policy(@vma, @addr, @order, @ilx) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup * @order: 0, or appropriate huge_page_order for interleaving * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE * * Returns effective policy for a VMA at specified address. * Falls back to current->mempolicy or system default policy, as necessary. * Shared policies [those marked as MPOL_F_SHARED] require an extra reference * count--added by the get_policy() vm_op, as appropriate--to protect against * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { struct mempolicy *pol; pol = __get_vma_policy(vma, addr, ilx); if (!pol) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE) { *ilx += vma->vm_pgoff >> order; *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); } return pol; } bool vma_policy_mof(struct vm_area_struct *vma) { struct mempolicy *pol; if (vma->vm_ops && vma->vm_ops->get_policy) { bool ret = false; pgoff_t ilx; /* ignored here */ pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx); if (pol && (pol->flags & MPOL_F_MOF)) ret = true; mpol_cond_put(pol); return ret; } pol = vma->vm_policy; if (!pol) pol = get_task_policy(current); return pol->flags & MPOL_F_MOF; } bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone) { enum zone_type dynamic_policy_zone = policy_zone; BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); /* * if policy->nodes has movable memory only, * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. * * policy->nodes is intersect with node_states[N_MEMORY]. * so if the following test fails, it implies * policy->nodes has movable memory only. */ if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) dynamic_policy_zone = ZONE_MOVABLE; return zone >= dynamic_policy_zone; } /* Do dynamic interleaving for a process */ static unsigned int interleave_nodes(struct mempolicy *policy) { unsigned int nid; nid = next_node_in(current->il_prev, policy->nodes); if (nid < MAX_NUMNODES) current->il_prev = nid; return nid; } /* * Depending on the memory policy provide a node from which to allocate the * next slab entry. */ unsigned int mempolicy_slab_node(void) { struct mempolicy *policy; int node = numa_mem_id(); if (!in_task()) return node; policy = current->mempolicy; if (!policy) return node; switch (policy->mode) { case MPOL_PREFERRED: return first_node(policy->nodes); case MPOL_INTERLEAVE: return interleave_nodes(policy); case MPOL_BIND: case MPOL_PREFERRED_MANY: { struct zoneref *z; /* * Follow bind policy behavior and start allocation at the * first node. */ struct zonelist *zonelist; enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, highest_zoneidx, &policy->nodes); return z->zone ? zone_to_nid(z->zone) : node; } case MPOL_LOCAL: return node; default: BUG(); } } /* * Do static interleaving for interleave index @ilx. Returns the ilx'th * node in pol->nodes (starting from ilx=0), wrapping around if ilx * exceeds the number of present nodes. */ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx) { nodemask_t nodemask = pol->nodes; unsigned int target, nnodes; int i; int nid; /* * The barrier will stabilize the nodemask in a register or on * the stack so that it will stop changing under the code. * * Between first_node() and next_node(), pol->nodes could be changed * by other threads. So we put pol->nodes in a local stack. */ barrier(); nnodes = nodes_weight(nodemask); if (!nnodes) return numa_node_id(); target = ilx % nnodes; nid = first_node(nodemask); for (i = 0; i < target; i++) nid = next_node(nid, nodemask); return nid; } /* * Return a nodemask representing a mempolicy for filtering nodes for * page allocation, together with preferred node id (or the input node id). */ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid) { nodemask_t *nodemask = NULL; switch (pol->mode) { case MPOL_PREFERRED: /* Override input node id */ *nid = first_node(pol->nodes); break; case MPOL_PREFERRED_MANY: nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; break; case MPOL_BIND: /* Restrict to nodemask (but not on lower zones) */ if (apply_policy_zone(pol, gfp_zone(gfp)) && cpuset_nodemask_valid_mems_allowed(&pol->nodes)) nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; /* * __GFP_THISNODE shouldn't even be used with the bind policy * because we might easily break the expectation to stay on the * requested node and not break the policy. */ WARN_ON_ONCE(gfp & __GFP_THISNODE); break; case MPOL_INTERLEAVE: /* Override input node id */ *nid = (ilx == NO_INTERLEAVE_INDEX) ? interleave_nodes(pol) : interleave_nid(pol, ilx); break; } return nodemask; } #ifdef CONFIG_HUGETLBFS /* * huge_node(@vma, @addr, @gfp_flags, @mpol) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup and interleave policy * @gfp_flags: for requested zone * @mpol: pointer to mempolicy pointer for reference counted mempolicy * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy * * Returns a nid suitable for a huge page allocation and a pointer * to the struct mempolicy for conditional unref after allocation. * If the effective policy is 'bind' or 'prefer-many', returns a pointer * to the mempolicy's @nodemask for filtering the zonelist. */ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { pgoff_t ilx; int nid; nid = numa_node_id(); *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx); *nodemask = policy_nodemask(gfp_flags, *mpol, ilx, &nid); return nid; } /* * init_nodemask_of_mempolicy * * If the current task's mempolicy is "default" [NULL], return 'false' * to indicate default policy. Otherwise, extract the policy nodemask * for 'bind' or 'interleave' policy into the argument nodemask, or * initialize the argument nodemask to contain the single node for * 'preferred' or 'local' policy and return 'true' to indicate presence * of non-default mempolicy. * * We don't bother with reference counting the mempolicy [mpol_get/put] * because the current task is examining it's own mempolicy and a task's * mempolicy is only ever changed by the task itself. * * N.B., it is the caller's responsibility to free a returned nodemask. */ bool init_nodemask_of_mempolicy(nodemask_t *mask) { struct mempolicy *mempolicy; if (!(mask && current->mempolicy)) return false; task_lock(current); mempolicy = current->mempolicy; switch (mempolicy->mode) { case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: *mask = mempolicy->nodes; break; case MPOL_LOCAL: init_nodemask_of_node(mask, numa_node_id()); break; default: BUG(); } task_unlock(current); return true; } #endif /* * mempolicy_in_oom_domain * * If tsk's mempolicy is "bind", check for intersection between mask and * the policy nodemask. Otherwise, return true for all other policies * including "interleave", as a tsk with "interleave" policy may have * memory allocated from all nodes in system. * * Takes task_lock(tsk) to prevent freeing of its mempolicy. */ bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask) { struct mempolicy *mempolicy; bool ret = true; if (!mask) return ret; task_lock(tsk); mempolicy = tsk->mempolicy; if (mempolicy && mempolicy->mode == MPOL_BIND) ret = nodes_intersects(mempolicy->nodes, *mask); task_unlock(tsk); return ret; } static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, int nid, nodemask_t *nodemask) { struct page *page; gfp_t preferred_gfp; /* * This is a two pass approach. The first pass will only try the * preferred nodes but skip the direct reclaim and allow the * allocation to fail, while the second pass will try all the * nodes in system. */ preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); page = __alloc_pages(preferred_gfp, order, nid, nodemask); if (!page) page = __alloc_pages(gfp, order, nid, NULL); return page; } /** * alloc_pages_mpol - Allocate pages according to NUMA mempolicy. * @gfp: GFP flags. * @order: Order of the page allocation. * @pol: Pointer to the NUMA mempolicy. * @ilx: Index for interleave mempolicy (also distinguishes alloc_pages()). * @nid: Preferred node (usually numa_node_id() but @mpol may override it). * * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { nodemask_t *nodemask; struct page *page; nodemask = policy_nodemask(gfp, pol, ilx, &nid); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_preferred_many(gfp, order, nid, nodemask); if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && /* filter "hugepage" allocation, unless from alloc_pages() */ order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) { /* * For hugepage allocation and non-interleave policy which * allows the current node (or other explicitly preferred * node) we only try to allocate from the current/preferred * node and don't fall back to other nodes, as the cost of * remote accesses would likely offset THP benefits. * * If the policy is interleave or does not allow the current * node in its nodemask, we allocate the standard way. */ if (pol->mode != MPOL_INTERLEAVE && (!nodemask || node_isset(nid, *nodemask))) { /* * First, try to allocate THP only on local node, but * don't reclaim unnecessarily, just compact. */ page = __alloc_pages_node(nid, gfp | __GFP_THISNODE | __GFP_NORETRY, order); if (page || !(gfp & __GFP_DIRECT_RECLAIM)) return page; /* * If hugepage allocations are configured to always * synchronous compact or the vma has been madvised * to prefer hugepage backing, retry allowing remote * memory with both reclaim and compact as well. */ } } page = __alloc_pages(gfp, order, nid, nodemask); if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) { /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */ if (static_branch_likely(&vm_numa_stat_key) && page_to_nid(page) == nid) { preempt_disable(); __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT); preempt_enable(); } } return page; } /** * vma_alloc_folio - Allocate a folio for a VMA. * @gfp: GFP flags. * @order: Order of the folio. * @vma: Pointer to VMA. * @addr: Virtual address of the allocation. Must be inside @vma. * @hugepage: Unused (was: For hugepages try only preferred node if possible). * * Allocate a folio for a specific address in @vma, using the appropriate * NUMA policy. The caller must hold the mmap_lock of the mm_struct of the * VMA to prevent it from going away. Should be used for all allocations * for folios that will be mapped into user space, excepting hugetlbfs, and * excepting where direct use of alloc_pages_mpol() is more appropriate. * * Return: The folio on success or NULL if allocation fails. */ struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage) { struct mempolicy *pol; pgoff_t ilx; struct page *page; pol = get_vma_policy(vma, addr, order, &ilx); page = alloc_pages_mpol(gfp | __GFP_COMP, order, pol, ilx, numa_node_id()); mpol_cond_put(pol); return page_rmappable_folio(page); } EXPORT_SYMBOL(vma_alloc_folio); /** * alloc_pages - Allocate pages. * @gfp: GFP flags. * @order: Power of two of number of pages to allocate. * * Allocate 1 << @order contiguous pages. The physical address of the * first page is naturally aligned (eg an order-3 allocation will be aligned * to a multiple of 8 * PAGE_SIZE bytes). The NUMA policy of the current * process is honoured when in process context. * * Context: Can be called from any context, providing the appropriate GFP * flags are used. * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages(gfp_t gfp, unsigned int order) { struct mempolicy *pol = &default_policy; /* * No reference counting needed for current->mempolicy * nor system default_policy */ if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); return alloc_pages_mpol(gfp, order, pol, NO_INTERLEAVE_INDEX, numa_node_id()); } EXPORT_SYMBOL(alloc_pages); struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order)); } EXPORT_SYMBOL(folio_alloc); static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { int nodes; unsigned long nr_pages_per_node; int delta; int i; unsigned long nr_allocated; unsigned long total_allocated = 0; nodes = nodes_weight(pol->nodes); nr_pages_per_node = nr_pages / nodes; delta = nr_pages - nodes * nr_pages_per_node; for (i = 0; i < nodes; i++) { if (delta) { nr_allocated = __alloc_pages_bulk(gfp, interleave_nodes(pol), NULL, nr_pages_per_node + 1, NULL, page_array); delta--; } else { nr_allocated = __alloc_pages_bulk(gfp, interleave_nodes(pol), NULL, nr_pages_per_node, NULL, page_array); } page_array += nr_allocated; total_allocated += nr_allocated; } return total_allocated; } static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { gfp_t preferred_gfp; unsigned long nr_allocated = 0; preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes, nr_pages, NULL, page_array); if (nr_allocated < nr_pages) nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL, nr_pages - nr_allocated, NULL, page_array + nr_allocated); return nr_allocated; } /* alloc pages bulk and mempolicy should be considered at the * same time in some situation such as vmalloc. * * It can accelerate memory allocation especially interleaving * allocate memory. */ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { struct mempolicy *pol = &default_policy; nodemask_t *nodemask; int nid; if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE) return alloc_pages_bulk_array_interleave(gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_bulk_array_preferred_many(gfp, numa_node_id(), pol, nr_pages, page_array); nid = numa_node_id(); nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid); return __alloc_pages_bulk(gfp, nid, nodemask, nr_pages, NULL, page_array); } int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(src->vm_policy); if (IS_ERR(pol)) return PTR_ERR(pol); dst->vm_policy = pol; return 0; } /* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() * with the mems_allowed returned by cpuset_mems_allowed(). This * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). * * current's mempolicy may be rebinded by the other task(the task that changes * cpuset's mems), so we needn't do rebind work for current task. */ /* Slow path of a mempolicy duplicate */ struct mempolicy *__mpol_dup(struct mempolicy *old) { struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!new) return ERR_PTR(-ENOMEM); /* task's mempolicy is protected by alloc_lock */ if (old == current->mempolicy) { task_lock(current); *new = *old; task_unlock(current); } else *new = *old; if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(new, &mems); } atomic_set(&new->refcnt, 1); return new; } /* Slow path of a mempolicy comparison */ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (!a || !b) return false; if (a->mode != b->mode) return false; if (a->flags != b->flags) return false; if (a->home_node != b->home_node) return false; if (mpol_store_user_nodemask(a)) if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) return false; switch (a->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: return !!nodes_equal(a->nodes, b->nodes); case MPOL_LOCAL: return true; default: BUG(); return false; } } /* * Shared memory backing store policy support. * * Remember policies even when nobody has shared memory mapped. * The policies are kept in Red-Black tree linked from the inode. * They are protected by the sp->lock rwlock, which should be held * for any accesses to the tree. */ /* * lookup first element intersecting start-end. Caller holds sp->lock for * reading or for writing */ static struct sp_node *sp_lookup(struct shared_policy *sp, pgoff_t start, pgoff_t end) { struct rb_node *n = sp->root.rb_node; while (n) { struct sp_node *p = rb_entry(n, struct sp_node, nd); if (start >= p->end) n = n->rb_right; else if (end <= p->start) n = n->rb_left; else break; } if (!n) return NULL; for (;;) { struct sp_node *w = NULL; struct rb_node *prev = rb_prev(n); if (!prev) break; w = rb_entry(prev, struct sp_node, nd); if (w->end <= start) break; n = prev; } return rb_entry(n, struct sp_node, nd); } /* * Insert a new shared policy into the list. Caller holds sp->lock for * writing. */ static void sp_insert(struct shared_policy *sp, struct sp_node *new) { struct rb_node **p = &sp->root.rb_node; struct rb_node *parent = NULL; struct sp_node *nd; while (*p) { parent = *p; nd = rb_entry(parent, struct sp_node, nd); if (new->start < nd->start) p = &(*p)->rb_left; else if (new->end > nd->end) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new->nd, parent, p); rb_insert_color(&new->nd, &sp->root); } /* Find shared policy intersecting idx */ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { struct mempolicy *pol = NULL; struct sp_node *sn; if (!sp->root.rb_node) return NULL; read_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } read_unlock(&sp->lock); return pol; } static void sp_free(struct sp_node *n) { mpol_put(n->policy); kmem_cache_free(sn_cache, n); } /** * mpol_misplaced - check whether current folio node is valid in policy * * @folio: folio to be checked * @vma: vm area where folio mapped * @addr: virtual address in @vma for shared policy lookup and interleave policy * * Lookup current policy node id for vma,addr and "compare to" folio's * node id. Policy determination "mimics" alloc_page_vma(). * Called from fault path where we know the vma and faulting address. * * Return: NUMA_NO_NODE if the page is in a node that is valid for this * policy, or a suitable node ID to allocate a replacement folio from. */ int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol; pgoff_t ilx; struct zoneref *z; int curnid = folio_nid(folio); int thiscpu = raw_smp_processor_id(); int thisnid = cpu_to_node(thiscpu); int polnid = NUMA_NO_NODE; int ret = NUMA_NO_NODE; pol = get_vma_policy(vma, addr, folio_order(folio), &ilx); if (!(pol->flags & MPOL_F_MOF)) goto out; switch (pol->mode) { case MPOL_INTERLEAVE: polnid = interleave_nid(pol, ilx); break; case MPOL_PREFERRED: if (node_isset(curnid, pol->nodes)) goto out; polnid = first_node(pol->nodes); break; case MPOL_LOCAL: polnid = numa_node_id(); break; case MPOL_BIND: /* Optimize placement among multiple nodes via NUMA balancing */ if (pol->flags & MPOL_F_MORON) { if (node_isset(thisnid, pol->nodes)) break; goto out; } fallthrough; case MPOL_PREFERRED_MANY: /* * use current page if in policy nodemask, * else select nearest allowed node, if any. * If no allowed nodes, use current [!misplaced]. */ if (node_isset(curnid, pol->nodes)) goto out; z = first_zones_zonelist( node_zonelist(numa_node_id(), GFP_HIGHUSER), gfp_zone(GFP_HIGHUSER), &pol->nodes); polnid = zone_to_nid(z->zone); break; default: BUG(); } /* Migrate the folio towards the node whose CPU is referencing it */ if (pol->flags & MPOL_F_MORON) { polnid = thisnid; if (!should_numa_migrate_memory(current, folio, curnid, thiscpu)) goto out; } if (curnid != polnid) ret = polnid; out: mpol_cond_put(pol); return ret; } /* * Drop the (possibly final) reference to task->mempolicy. It needs to be * dropped after task->mempolicy is set to NULL so that any allocation done as * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed * policy. */ void mpol_put_task_policy(struct task_struct *task) { struct mempolicy *pol; task_lock(task); pol = task->mempolicy; task->mempolicy = NULL; task_unlock(task); mpol_put(pol); } static void sp_delete(struct shared_policy *sp, struct sp_node *n) { rb_erase(&n->nd, &sp->root); sp_free(n); } static void sp_node_init(struct sp_node *node, unsigned long start, unsigned long end, struct mempolicy *pol) { node->start = start; node->end = end; node->policy = pol; } static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { struct sp_node *n; struct mempolicy *newpol; n = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n) return NULL; newpol = mpol_dup(pol); if (IS_ERR(newpol)) { kmem_cache_free(sn_cache, n); return NULL; } newpol->flags |= MPOL_F_SHARED; sp_node_init(n, start, end, newpol); return n; } /* Replace a policy range. */ static int shared_policy_replace(struct shared_policy *sp, pgoff_t start, pgoff_t end, struct sp_node *new) { struct sp_node *n; struct sp_node *n_new = NULL; struct mempolicy *mpol_new = NULL; int ret = 0; restart: write_lock(&sp->lock); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { struct rb_node *next = rb_next(&n->nd); if (n->start >= start) { if (n->end <= end) sp_delete(sp, n); else n->start = end; } else { /* Old policy spanning whole new range. */ if (n->end > end) { if (!n_new) goto alloc_new; *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); n_new = NULL; mpol_new = NULL; break; } else n->end = start; } if (!next) break; n = rb_entry(next, struct sp_node, nd); } if (new) sp_insert(sp, new); write_unlock(&sp->lock); ret = 0; err_out: if (mpol_new) mpol_put(mpol_new); if (n_new) kmem_cache_free(sn_cache, n_new); return ret; alloc_new: write_unlock(&sp->lock); ret = -ENOMEM; n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n_new) goto err_out; mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; atomic_set(&mpol_new->refcnt, 1); goto restart; } /** * mpol_shared_policy_init - initialize shared policy for inode * @sp: pointer to inode shared policy * @mpol: struct mempolicy to install * * Install non-NULL @mpol in inode's shared policy rb-tree. * On entry, the current task has a reference on a non-NULL @mpol. * This must be released on exit. * This is called at get_inode() calls and we can use GFP_KERNEL. */ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ rwlock_init(&sp->lock); if (mpol) { struct sp_node *sn; struct mempolicy *npol; NODEMASK_SCRATCH(scratch); if (!scratch) goto put_mpol; /* contextualize the tmpfs mount point mempolicy to this file */ npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); if (IS_ERR(npol)) goto free_scratch; /* no valid nodemask intersection */ task_lock(current); ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch); task_unlock(current); if (ret) goto put_npol; /* alloc node covering entire file; adds ref to file's npol */ sn = sp_alloc(0, MAX_LFS_FILESIZE >> PAGE_SHIFT, npol); if (sn) sp_insert(sp, sn); put_npol: mpol_put(npol); /* drop initial ref on file's npol */ free_scratch: NODEMASK_SCRATCH_FREE(scratch); put_mpol: mpol_put(mpol); /* drop our incoming ref on sb mpol */ } } int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct sp_node *new = NULL; unsigned long sz = vma_pages(vma); if (pol) { new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol); if (!new) return -ENOMEM; } err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new); if (err && new) sp_free(new); return err; } /* Free a backing policy store on inode delete. */ void mpol_free_shared_policy(struct shared_policy *sp) { struct sp_node *n; struct rb_node *next; if (!sp->root.rb_node) return; write_lock(&sp->lock); next = rb_first(&sp->root); while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); sp_delete(sp, n); } write_unlock(&sp->lock); } #ifdef CONFIG_NUMA_BALANCING static int __initdata numabalancing_override; static void __init check_numabalancing_enable(void) { bool numabalancing_default = false; if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) numabalancing_default = true; /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ if (numabalancing_override) set_numabalancing_state(numabalancing_override == 1); if (num_online_nodes() > 1 && !numabalancing_override) { pr_info("%s automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl\n", numabalancing_default ? "Enabling" : "Disabling"); set_numabalancing_state(numabalancing_default); } } static int __init setup_numabalancing(char *str) { int ret = 0; if (!str) goto out; if (!strcmp(str, "enable")) { numabalancing_override = 1; ret = 1; } else if (!strcmp(str, "disable")) { numabalancing_override = -1; ret = 1; } out: if (!ret) pr_warn("Unable to parse numa_balancing=\n"); return ret; } __setup("numa_balancing=", setup_numabalancing); #else static inline void __init check_numabalancing_enable(void) { } #endif /* CONFIG_NUMA_BALANCING */ void __init numa_policy_init(void) { nodemask_t interleave_nodes; unsigned long largest = 0; int nid, prefer = 0; policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), 0, SLAB_PANIC, NULL); for_each_node(nid) { preferred_node_policy[nid] = (struct mempolicy) { .refcnt = ATOMIC_INIT(1), .mode = MPOL_PREFERRED, .flags = MPOL_F_MOF | MPOL_F_MORON, .nodes = nodemask_of_node(nid), }; } /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); for_each_node_state(nid, N_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ if (largest < total_pages) { largest = total_pages; prefer = nid; } /* Interleave this node? */ if ((total_pages << PAGE_SHIFT) >= (16 << 20)) node_set(nid, interleave_nodes); } /* All too small, use the largest */ if (unlikely(nodes_empty(interleave_nodes))) node_set(prefer, interleave_nodes); if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) pr_err("%s: interleaving failed\n", __func__); check_numabalancing_enable(); } /* Reset policy of current process to default */ void numa_default_policy(void) { do_set_mempolicy(MPOL_DEFAULT, 0, NULL); } /* * Parse and format mempolicy from/to strings */ static const char * const policy_modes[] = { [MPOL_DEFAULT] = "default", [MPOL_PREFERRED] = "prefer", [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_LOCAL] = "local", [MPOL_PREFERRED_MANY] = "prefer (many)", }; #ifdef CONFIG_TMPFS /** * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. * @str: string containing mempolicy to parse * @mpol: pointer to struct mempolicy pointer, returned on success. * * Format of input: * <mode>[=<flags>][:<nodelist>] * * Return: %0 on success, else %1 */ int mpol_parse_str(char *str, struct mempolicy **mpol) { struct mempolicy *new = NULL; unsigned short mode_flags; nodemask_t nodes; char *nodelist = strchr(str, ':'); char *flags = strchr(str, '='); int err = 1, mode; if (flags) *flags++ = '\0'; /* terminate mode string */ if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; if (nodelist_parse(nodelist, nodes)) goto out; if (!nodes_subset(nodes, node_states[N_MEMORY])) goto out; } else nodes_clear(nodes); mode = match_string(policy_modes, MPOL_MAX, str); if (mode < 0) goto out; switch (mode) { case MPOL_PREFERRED: /* * Insist on a nodelist of one node only, although later * we use first_node(nodes) to grab a single node, so here * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; while (isdigit(*rest)) rest++; if (*rest) goto out; if (nodes_empty(nodes)) goto out; } break; case MPOL_INTERLEAVE: /* * Default to online nodes with memory if no nodelist */ if (!nodelist) nodes = node_states[N_MEMORY]; break; case MPOL_LOCAL: /* * Don't allow a nodelist; mpol_new() checks flags */ if (nodelist) goto out; break; case MPOL_DEFAULT: /* * Insist on a empty nodelist */ if (!nodelist) err = 0; goto out; case MPOL_PREFERRED_MANY: case MPOL_BIND: /* * Insist on a nodelist */ if (!nodelist) goto out; } mode_flags = 0; if (flags) { /* * Currently, we only support two mutually exclusive * mode flags. */ if (!strcmp(flags, "static")) mode_flags |= MPOL_F_STATIC_NODES; else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else goto out; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) goto out; /* * Save nodes for mpol_to_str() to show the tmpfs mount options * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. */ if (mode != MPOL_PREFERRED) { new->nodes = nodes; } else if (nodelist) { nodes_clear(new->nodes); node_set(first_node(nodes), new->nodes); } else { new->mode = MPOL_LOCAL; } /* * Save nodes for contextualization: this will be used to "clone" * the mempolicy in a specific context [cpuset] at a later time. */ new->w.user_nodemask = nodes; err = 0; out: /* Restore string for error message */ if (nodelist) *--nodelist = ':'; if (flags) *--flags = '='; if (!err) *mpol = new; return err; } #endif /* CONFIG_TMPFS */ /** * mpol_to_str - format a mempolicy structure for printing * @buffer: to contain formatted mempolicy string * @maxlen: length of @buffer * @pol: pointer to mempolicy to be formatted * * Convert @pol into a string. If @buffer is too short, truncate the string. * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the * longest flag, "relative", and to display at least a few node ids. */ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { char *p = buffer; nodemask_t nodes = NODE_MASK_NONE; unsigned short mode = MPOL_DEFAULT; unsigned short flags = 0; if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) { mode = pol->mode; flags = pol->flags; } switch (mode) { case MPOL_DEFAULT: case MPOL_LOCAL: break; case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: nodes = pol->nodes; break; default: WARN_ON_ONCE(1); snprintf(p, maxlen, "unknown"); return; } p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); /* * Currently, the only defined flags are mutually exclusive */ if (flags & MPOL_F_STATIC_NODES) p += snprintf(p, buffer + maxlen - p, "static"); else if (flags & MPOL_F_RELATIVE_NODES) p += snprintf(p, buffer + maxlen - p, "relative"); } if (!nodes_empty(nodes)) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } |
4 7 6 3 1 1 3 1 1 3 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * dir.c */ /* * This file implements code to read directories from disk. * * See namei.c for a description of directory organisation on disk. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" static const unsigned char squashfs_filetype_table[] = { DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK }; /* * Lookup offset (f_pos) in the directory index, returning the * metadata block containing it. * * If we get an error reading the index then return the part of the index * (if any) we have managed to read - the index isn't essential, just * quicker. */ static int get_dir_index_using_offset(struct super_block *sb, u64 *next_block, int *next_offset, u64 index_start, int index_offset, int i_count, u64 f_pos) { struct squashfs_sb_info *msblk = sb->s_fs_info; int err, i, index, length = 0; unsigned int size; struct squashfs_dir_index dir_index; TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", i_count, f_pos); /* * Translate from external f_pos to the internal f_pos. This * is offset by 3 because we invent "." and ".." entries which are * not actually stored in the directory. */ if (f_pos <= 3) return f_pos; f_pos -= 3; for (i = 0; i < i_count; i++) { err = squashfs_read_metadata(sb, &dir_index, &index_start, &index_offset, sizeof(dir_index)); if (err < 0) break; index = le32_to_cpu(dir_index.index); if (index > f_pos) /* * Found the index we're looking for. */ break; size = le32_to_cpu(dir_index.size) + 1; /* size should never be larger than SQUASHFS_NAME_LEN */ if (size > SQUASHFS_NAME_LEN) break; err = squashfs_read_metadata(sb, NULL, &index_start, &index_offset, size); if (err < 0) break; length = index; *next_block = le32_to_cpu(dir_index.start_block) + msblk->directory_table; } *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; /* * Translate back from internal f_pos to external f_pos. */ return length + 3; } static int squashfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; int offset = squashfs_i(inode)->offset, length, err; unsigned int inode_number, dir_count, size, type; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; TRACE("Entered squashfs_readdir [%llx:%x]\n", block, offset); dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); if (dire == NULL) { ERROR("Failed to allocate squashfs_dir_entry\n"); goto finish; } /* * Return "." and ".." entries as the first two filenames in the * directory. To maximise compression these two entries are not * stored in the directory, and so we invent them here. * * It also means that the external f_pos is offset by 3 from the * on-disk directory f_pos. */ while (ctx->pos < 3) { char *name; int i_ino; if (ctx->pos == 0) { name = "."; size = 1; i_ino = inode->i_ino; } else { name = ".."; size = 2; i_ino = squashfs_i(inode)->parent; } if (!dir_emit(ctx, name, size, i_ino, squashfs_filetype_table[1])) goto finish; ctx->pos += size; } length = get_dir_index_using_offset(inode->i_sb, &block, &offset, squashfs_i(inode)->dir_idx_start, squashfs_i(inode)->dir_idx_offset, squashfs_i(inode)->dir_idx_cnt, ctx->pos); while (length < i_size_read(inode)) { /* * Read directory header */ err = squashfs_read_metadata(inode->i_sb, &dirh, &block, &offset, sizeof(dirh)); if (err < 0) goto failed_read; length += sizeof(dirh); dir_count = le32_to_cpu(dirh.count) + 1; if (dir_count > SQUASHFS_DIR_COUNT) goto failed_read; while (dir_count--) { /* * Read directory entry. */ err = squashfs_read_metadata(inode->i_sb, dire, &block, &offset, sizeof(*dire)); if (err < 0) goto failed_read; size = le16_to_cpu(dire->size) + 1; /* size should never be larger than SQUASHFS_NAME_LEN */ if (size > SQUASHFS_NAME_LEN) goto failed_read; err = squashfs_read_metadata(inode->i_sb, dire->name, &block, &offset, size); if (err < 0) goto failed_read; length += sizeof(*dire) + size; if (ctx->pos >= length) continue; dire->name[size] = '\0'; inode_number = le32_to_cpu(dirh.inode_number) + ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); if (type > SQUASHFS_MAX_DIR_TYPE) goto failed_read; if (!dir_emit(ctx, dire->name, size, inode_number, squashfs_filetype_table[type])) goto finish; ctx->pos = length; } } finish: kfree(dire); return 0; failed_read: ERROR("Unable to read directory block [%llx:%x]\n", block, offset); kfree(dire); return 0; } const struct file_operations squashfs_dir_ops = { .read = generic_read_dir, .iterate_shared = squashfs_readdir, .llseek = generic_file_llseek, }; |
147 71 77 139 4 11 188 117 103 230 76 19 2 2 1 1 1 1 1 123 61 118 123 169 169 35 16 11 131 26 105 31 86 1 10 22 2 18 5 5 2 3 2 2 19 2 1 1 22 215 1 3 1 1 1 3 1 11 9 4 8 22 5 5 2 21 8 5 2 1 4 2 3 1 2 5 1 1 1 3 1 1 1 1 3 2 1 1 2 3 2 3 1 1 2 14 7 1 4 23 2 5 2 4 2 2 11 2 1 11 || // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/bpf-cgroup.h> #include <linux/cgroup.h> #include <linux/rcupdate.h> #include <linux/random.h> #include <linux/smp.h> #include <linux/topology.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/uidgid.h> #include <linux/filter.h> #include <linux/ctype.h> #include <linux/jiffies.h> #include <linux/pid_namespace.h> #include <linux/poison.h> #include <linux/proc_ns.h> #include <linux/sched/task.h> #include <linux/security.h> #include <linux/btf_ids.h> #include <linux/bpf_mem_alloc.h> #include <linux/kasan.h> #include "../../lib/kstrtox.h" /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments * * Different map implementations will rely on rcu in map methods * lookup/update/delete, therefore eBPF programs must run under rcu lock * if program is allowed to access maps, so check rcu_read_lock_held in * all three functions. */ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, void *, value, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, .arg3_type = ARG_PTR_TO_MAP_VALUE, .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return map->ops->map_delete_elem(map, key); } const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) { return map->ops->map_push_elem(map, value, flags); } const struct bpf_func_proto bpf_map_push_elem_proto = { .func = bpf_map_push_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE, .arg3_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) { return map->ops->map_pop_elem(map, value); } const struct bpf_func_proto bpf_map_pop_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) { return map->ops->map_peek_elem(map, value); } const struct bpf_func_proto bpf_map_peek_elem_proto = { .func = bpf_map_peek_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, }; BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); } const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { .func = bpf_map_lookup_percpu_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, .arg3_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_get_prandom_u32_proto = { .func = bpf_user_rnd_u32, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .func = bpf_get_smp_processor_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_numa_node_id) { return numa_node_id(); } const struct bpf_func_proto bpf_get_numa_node_id_proto = { .func = bpf_get_numa_node_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_ns_proto = { .func = bpf_ktime_get_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_boot_ns) { /* NMI safe access to clock boottime */ return ktime_get_boot_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { .func = bpf_ktime_get_boot_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_coarse_ns) { return ktime_get_coarse_ns(); } const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { .func = bpf_ktime_get_coarse_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_tai_ns) { /* NMI safe access to clock tai */ return ktime_get_tai_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = { .func = bpf_ktime_get_tai_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; } const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .func = bpf_get_current_pid_tgid, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; kgid_t gid; if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .func = bpf_get_current_uid_gid, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; if (unlikely(!task)) goto err_clear; /* Verifier guarantees that size > 0 */ strscpy_pad(buf, task->comm, size); return 0; err_clear: memset(buf, 0, size); return -EINVAL; } const struct bpf_func_proto bpf_get_current_comm_proto = { .func = bpf_get_current_comm, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE, }; #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) { arch_spinlock_t *l = (void *)lock; union { __u32 val; arch_spinlock_t lock; } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); preempt_disable(); arch_spin_lock(l); } static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) { arch_spinlock_t *l = (void *)lock; arch_spin_unlock(l); preempt_enable(); } #else static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) { atomic_t *l = (void *)lock; BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); do { atomic_cond_read_relaxed(l, !VAL); } while (atomic_xchg(l, 1)); } static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) { atomic_t *l = (void *)lock; atomic_set_release(l, 0); } #endif static DEFINE_PER_CPU(unsigned long, irqsave_flags); static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) { unsigned long flags; local_irq_save(flags); __bpf_spin_lock(lock); __this_cpu_write(irqsave_flags, flags); } notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; } const struct bpf_func_proto bpf_spin_lock_proto = { .func = bpf_spin_lock, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, .arg1_btf_id = BPF_PTR_POISON, }; static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) { unsigned long flags; flags = __this_cpu_read(irqsave_flags); __bpf_spin_unlock(lock); local_irq_restore(flags); } notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; } const struct bpf_func_proto bpf_spin_unlock_proto = { .func = bpf_spin_unlock, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, .arg1_btf_id = BPF_PTR_POISON, }; void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src) { struct bpf_spin_lock *lock; if (lock_src) lock = src + map->record->spin_lock_off; else lock = dst + map->record->spin_lock_off; preempt_disable(); __bpf_spin_lock_irqsave(lock); copy_map_value(map, dst, src); __bpf_spin_unlock_irqrestore(lock); preempt_enable(); } BPF_CALL_0(bpf_jiffies64) { return get_jiffies_64(); } const struct bpf_func_proto bpf_jiffies64_proto = { .func = bpf_jiffies64, .gpl_only = false, .ret_type = RET_INTEGER, }; #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { struct cgroup *cgrp; u64 cgrp_id; rcu_read_lock(); cgrp = task_dfl_cgroup(current); cgrp_id = cgroup_id(cgrp); rcu_read_unlock(); return cgrp_id; } const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { .func = bpf_get_current_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) { struct cgroup *cgrp; struct cgroup *ancestor; u64 cgrp_id; rcu_read_lock(); cgrp = task_dfl_cgroup(current); ancestor = cgroup_ancestor(cgrp, ancestor_level); cgrp_id = ancestor ? cgroup_id(ancestor) : 0; rcu_read_unlock(); return cgrp_id; } const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { .func = bpf_get_current_ancestor_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, }; #endif /* CONFIG_CGROUPS */ #define BPF_STRTOX_BASE_MASK 0x1F static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, unsigned long long *res, bool *is_negative) { unsigned int base = flags & BPF_STRTOX_BASE_MASK; const char *cur_buf = buf; size_t cur_len = buf_len; unsigned int consumed; size_t val_len; char str[64]; if (!buf || !buf_len || !res || !is_negative) return -EINVAL; if (base != 0 && base != 8 && base != 10 && base != 16) return -EINVAL; if (flags & ~BPF_STRTOX_BASE_MASK) return -EINVAL; while (cur_buf < buf + buf_len && isspace(*cur_buf)) ++cur_buf; *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); if (*is_negative) ++cur_buf; consumed = cur_buf - buf; cur_len -= consumed; if (!cur_len) return -EINVAL; cur_len = min(cur_len, sizeof(str) - 1); memcpy(str, cur_buf, cur_len); str[cur_len] = '\0'; cur_buf = str; cur_buf = _parse_integer_fixup_radix(cur_buf, &base); val_len = _parse_integer(cur_buf, base, res); if (val_len & KSTRTOX_OVERFLOW) return -ERANGE; if (val_len == 0) return -EINVAL; cur_buf += val_len; consumed += cur_buf - str; return consumed; } static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, long long *res) { unsigned long long _res; bool is_negative; int err; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; if (is_negative) { if ((long long)-_res > 0) return -ERANGE; *res = -_res; } else { if ((long long)_res < 0) return -ERANGE; *res = _res; } return err; } BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, long *, res) { long long _res; int err; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; if (_res != (long)_res) return -ERANGE; *res = _res; return err; } const struct bpf_func_proto bpf_strtol_proto = { .func = bpf_strtol, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_LONG, }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, unsigned long *, res) { unsigned long long _res; bool is_negative; int err; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; if (is_negative) return -EINVAL; if (_res != (unsigned long)_res) return -ERANGE; *res = _res; return err; } const struct bpf_func_proto bpf_strtoul_proto = { .func = bpf_strtoul, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_LONG, }; BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) { return strncmp(s1, s2, s1_sz); } static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_CONST_STR, }; BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, struct bpf_pidns_info *, nsdata, u32, size) { struct task_struct *task = current; struct pid_namespace *pidns; int err = -EINVAL; if (unlikely(size != sizeof(struct bpf_pidns_info))) goto clear; if (unlikely((u64)(dev_t)dev != dev)) goto clear; if (unlikely(!task)) goto clear; pidns = task_active_pid_ns(task); if (unlikely(!pidns)) { err = -ENOENT; goto clear; } if (!ns_match(&pidns->ns, (dev_t)dev, ino)) goto clear; nsdata->pid = task_pid_nr_ns(task, pidns); nsdata->tgid = task_tgid_nr_ns(task, pidns); return 0; clear: memset((void *)nsdata, 0, (size_t) size); return err; } const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { .func = bpf_get_ns_current_pid_tgid, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_UNINIT_MEM, .arg4_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { .func = bpf_get_raw_cpu_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags, void *, data, u64, size) { if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; return bpf_event_output(map, flags, data, size, NULL, 0, NULL); } const struct bpf_func_proto bpf_event_output_data_proto = { .func = bpf_event_output_data, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, const void __user *, user_ptr) { int ret = copy_from_user(dst, user_ptr, size); if (unlikely(ret)) { memset(dst, 0, size); ret = -EFAULT; } return ret; } const struct bpf_func_proto bpf_copy_from_user_proto = { .func = bpf_copy_from_user, .gpl_only = false, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, const void __user *, user_ptr, struct task_struct *, tsk, u64, flags) { int ret; /* flags is not used yet */ if (unlikely(flags)) return -EINVAL; if (unlikely(!size)) return 0; ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0); if (ret == size) return 0; memset(dst, 0, size); /* Return -EFAULT for partial read */ return ret < 0 ? ret : -EFAULT; } const struct bpf_func_proto bpf_copy_from_user_task_proto = { .func = bpf_copy_from_user_task, .gpl_only = true, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_BTF_ID, .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg5_type = ARG_ANYTHING }; BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) { if (cpu >= nr_cpu_ids) return (unsigned long)NULL; return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); } const struct bpf_func_proto bpf_per_cpu_ptr_proto = { .func = bpf_per_cpu_ptr, .gpl_only = false, .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, .arg2_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) { return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); } const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .func = bpf_this_cpu_ptr, .gpl_only = false, .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, size_t bufsz) { void __user *user_ptr = (__force void __user *)unsafe_ptr; buf[0] = 0; switch (fmt_ptype) { case 's': #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if ((unsigned long)unsafe_ptr < TASK_SIZE) return strncpy_from_user_nofault(buf, user_ptr, bufsz); fallthrough; #endif case 'k': return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); case 'u': return strncpy_from_user_nofault(buf, user_ptr, bufsz); } return -EINVAL; } /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ #define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { char bin_args[MAX_BPRINTF_BIN_ARGS]; char buf[MAX_BPRINTF_BUF]; }; static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { int nest_level; preempt_disable(); nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); return -EBUSY; } *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { if (!data->bin_args && !data->buf) return; if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) return; this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); } /* * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers * * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in * data->bin_args a binary representation of arguments usable by bstr_printf * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data) { bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; fmt_end = strnchr(fmt, fmt_size, 0); if (!fmt_end) return -EINVAL; fmt_size = fmt_end - fmt; if (get_buffers && try_get_buffers(&buffers)) return -EBUSY; if (data->get_bin_args) { if (num_args) tmp_buf = buffers->bin_args; tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; data->bin_args = (u32 *)tmp_buf; } if (data->get_buf) data->buf = buffers->buf; for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; goto out; } if (fmt[i] != '%') continue; if (fmt[i + 1] == '%') { i++; continue; } if (num_spec >= num_args) { err = -EINVAL; goto out; } /* The string is zero-terminated so if fmt[i] != 0, we can * always access fmt[i + 1], in the worst case it will be a 0 */ i++; /* skip optional "[0 +-][num]" width formatting field */ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || fmt[i] == ' ') i++; if (fmt[i] >= '1' && fmt[i] <= '9') { i++; while (fmt[i] >= '0' && fmt[i] <= '9') i++; } if (fmt[i] == 'p') { sizeof_cur_arg = sizeof(long); if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && fmt[i + 2] == 's') { fmt_ptype = fmt[i + 1]; i += 2; goto fmt_str; } if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || fmt[i + 1] == 'x' || fmt[i + 1] == 's' || fmt[i + 1] == 'S') { /* just kernel pointers */ if (tmp_buf) cur_arg = raw_args[num_spec]; i++; goto nocopy_fmt; } if (fmt[i + 1] == 'B') { if (tmp_buf) { err = snprintf(tmp_buf, (tmp_buf_end - tmp_buf), "%pB", (void *)(long)raw_args[num_spec]); tmp_buf += (err + 1); } i++; num_spec++; continue; } /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { err = -EINVAL; goto out; } i += 2; if (!tmp_buf) goto nocopy_fmt; sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { err = -ENOSPC; goto out; } unsafe_ptr = (char *)(long)raw_args[num_spec]; err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, sizeof_cur_ip); if (err < 0) memset(cur_ip, 0, sizeof_cur_ip); /* hack: bstr_printf expects IP addresses to be * pre-formatted as strings, ironically, the easiest way * to do that is to call snprintf. */ ip_spec[2] = fmt[i - 1]; ip_spec[3] = fmt[i]; err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, ip_spec, &cur_ip); tmp_buf += err + 1; num_spec++; continue; } else if (fmt[i] == 's') { fmt_ptype = fmt[i]; fmt_str: if (fmt[i + 1] != 0 && !isspace(fmt[i + 1]) && !ispunct(fmt[i + 1])) { err = -EINVAL; goto out; } if (!tmp_buf) goto nocopy_fmt; if (tmp_buf_end == tmp_buf) { err = -ENOSPC; goto out; } unsafe_ptr = (char *)(long)raw_args[num_spec]; err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, fmt_ptype, tmp_buf_end - tmp_buf); if (err < 0) { tmp_buf[0] = '\0'; err = 1; } tmp_buf += err; num_spec++; continue; } else if (fmt[i] == 'c') { if (!tmp_buf) goto nocopy_fmt; if (tmp_buf_end == tmp_buf) { err = -ENOSPC; goto out; } *tmp_buf = raw_args[num_spec]; tmp_buf++; num_spec++; continue; } sizeof_cur_arg = sizeof(int); if (fmt[i] == 'l') { sizeof_cur_arg = sizeof(long); i++; } if (fmt[i] == 'l') { sizeof_cur_arg = sizeof(long long); i++; } if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x' && fmt[i] != 'X') { err = -EINVAL; goto out; } if (tmp_buf) cur_arg = raw_args[num_spec]; nocopy_fmt: if (tmp_buf) { tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { err = -ENOSPC; goto out; } if (sizeof_cur_arg == 8) { *(u32 *)tmp_buf = *(u32 *)&cur_arg; *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); } else { *(u32 *)tmp_buf = (u32)(long)cur_arg; } tmp_buf += sizeof_cur_arg; } num_spec++; } err = 0; out: if (err) bpf_bprintf_cleanup(data); return err; } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, const void *, args, u32, data_len) { struct bpf_bprintf_data data = { .get_bin_args = true, }; int err, num_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; err = bstr_printf(str, str_size, fmt, data.bin_args); bpf_bprintf_cleanup(&data); return err + 1; } const struct bpf_func_proto bpf_snprintf_proto = { .func = bpf_snprintf, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_CONST_STR, .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; /* BPF map elements can contain 'struct bpf_timer'. * Such map owns all of its BPF timers. * 'struct bpf_timer' is allocated as part of map element allocation * and it's zero initialized. * That space is used to keep 'struct bpf_timer_kern'. * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and * remembers 'struct bpf_map *' pointer it's part of. * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. * bpf_timer_start() arms the timer. * If user space reference to a map goes to zero at this point * ops->map_release_uref callback is responsible for cancelling the timers, * freeing their memory, and decrementing prog's refcnts. * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. * Inner maps can contain bpf timers as well. ops->map_release_uref is * freeing the timers when inner map is replaced or deleted by user space. */ struct bpf_hrtimer { struct hrtimer timer; struct bpf_map *map; struct bpf_prog *prog; void __rcu *callback_fn; void *value; }; /* the actual struct hidden inside uapi struct bpf_timer */ struct bpf_timer_kern { struct bpf_hrtimer *timer; /* bpf_spin_lock is used here instead of spinlock_t to make * sure that it always fits into space reserved by struct bpf_timer * regardless of LOCKDEP and spinlock debug flags. */ struct bpf_spin_lock lock; } __attribute__((aligned(8))); static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) { struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); struct bpf_map *map = t->map; void *value = t->value; bpf_callback_t callback_fn; void *key; u32 idx; BTF_TYPE_EMIT(struct bpf_timer); callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); if (!callback_fn) goto out; /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and * cannot be preempted by another bpf_timer_cb() on the same cpu. * Remember the timer this callback is servicing to prevent * deadlock if callback_fn() calls bpf_timer_cancel() or * bpf_map_delete_elem() on the same timer. */ this_cpu_write(hrtimer_running, t); if (map->map_type == BPF_MAP_TYPE_ARRAY) { struct bpf_array *array = container_of(map, struct bpf_array, map); /* compute the key */ idx = ((char *)value - array->value) / array->elem_size; key = &idx; } else { /* hash or lru */ key = value - round_up(map->key_size, 8); } callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); /* The verifier checked that return value is zero. */ this_cpu_write(hrtimer_running, NULL); out: return HRTIMER_NORESTART; } BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, u64, flags) { clockid_t clockid = flags & (MAX_CLOCKS - 1); struct bpf_hrtimer *t; int ret = 0; BUILD_BUG_ON(MAX_CLOCKS != 16); BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); if (in_nmi()) return -EOPNOTSUPP; if (flags >= MAX_CLOCKS || /* similar to timerfd except _ALARM variants are not supported */ (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_BOOTTIME)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (t) { ret = -EBUSY; goto out; } /* allocate hrtimer via map_kmalloc to use memcg accounting */ t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); if (!t) { ret = -ENOMEM; goto out; } t->value = (void *)timer - map->record->timer_off; t->map = map; t->prog = NULL; rcu_assign_pointer(t->callback_fn, NULL); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; WRITE_ONCE(timer->timer, t); /* Guarantee the order between timer->timer and map->usercnt. So * when there are concurrent uref release and bpf timer init, either * bpf_timer_cancel_and_free() called by uref release reads a no-NULL * timer or atomic64_read() below returns a zero usercnt. */ smp_mb(); if (!atomic64_read(&map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. */ WRITE_ONCE(timer->timer, NULL); kfree(t); ret = -EPERM; } out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; } static const struct bpf_func_proto bpf_timer_init_proto = { .func = bpf_timer_init, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, struct bpf_prog_aux *, aux) { struct bpf_prog *prev, *prog = aux->prog; struct bpf_hrtimer *t; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { ret = -EINVAL; goto out; } if (!atomic64_read(&t->map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. Otherwise timer might still be * running even when bpf prog is detached and user space * is gone, since map_release_uref won't ever be called. */ ret = -EPERM; goto out; } prev = t->prog; if (prev != prog) { /* Bump prog refcnt once. Every bpf_timer_set_callback() * can pick different callback_fn-s within the same prog. */ prog = bpf_prog_inc_not_zero(prog); if (IS_ERR(prog)) { ret = PTR_ERR(prog); goto out; } if (prev) /* Drop prev prog refcnt when swapping with new prog */ bpf_prog_put(prev); t->prog = prog; } rcu_assign_pointer(t->callback_fn, callback_fn); out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; } static const struct bpf_func_proto bpf_timer_set_callback_proto = { .func = bpf_timer_set_callback, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_PTR_TO_FUNC, }; BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) { struct bpf_hrtimer *t; int ret = 0; enum hrtimer_mode mode; if (in_nmi()) return -EOPNOTSUPP; if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t || !t->prog) { ret = -EINVAL; goto out; } if (flags & BPF_F_TIMER_ABS) mode = HRTIMER_MODE_ABS_SOFT; else mode = HRTIMER_MODE_REL_SOFT; if (flags & BPF_F_TIMER_CPU_PIN) mode |= HRTIMER_MODE_PINNED; hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; } static const struct bpf_func_proto bpf_timer_start_proto = { .func = bpf_timer_start, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, }; static void drop_prog_refcnt(struct bpf_hrtimer *t) { struct bpf_prog *prog = t->prog; if (prog) { bpf_prog_put(prog); t->prog = NULL; rcu_assign_pointer(t->callback_fn, NULL); } } BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) { struct bpf_hrtimer *t; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { ret = -EINVAL; goto out; } if (this_cpu_read(hrtimer_running) == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock * since it waits for callback_fn to finish */ ret = -EDEADLK; goto out; } drop_prog_refcnt(t); out: __bpf_spin_unlock_irqrestore(&timer->lock); /* Cancel the timer and wait for associated callback to finish * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); return ret; } static const struct bpf_func_proto bpf_timer_cancel_proto = { .func = bpf_timer_cancel, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, }; /* This function is called by map_delete/update_elem for individual element and * by ops->map_release_uref when the user space reference to a map reaches zero. */ void bpf_timer_cancel_and_free(void *val) { struct bpf_timer_kern *timer = val; struct bpf_hrtimer *t; /* Performance optimization: read timer->timer without lock first. */ if (!READ_ONCE(timer->timer)) return; __bpf_spin_lock_irqsave(&timer->lock); /* re-read it under lock */ t = timer->timer; if (!t) goto out; drop_prog_refcnt(t); /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized. */ WRITE_ONCE(timer->timer, NULL); out: __bpf_spin_unlock_irqrestore(&timer->lock); if (!t) return; /* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call kfree(t) * right after for both preallocated and non-preallocated maps. * The timer->timer = NULL was already done and no code path can * see address 't' anymore. * * Check that bpf_map_delete/update_elem() wasn't called from timer * callback_fn. In such case don't call hrtimer_cancel() (since it will * deadlock) and don't call hrtimer_try_to_cancel() (since it will just * return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since timer->timer = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); kfree(t); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) { unsigned long *kptr = map_value; return xchg(kptr, (unsigned long)ptr); } /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to * denote type that verifier will determine. */ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .func = bpf_kptr_xchg, .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, .ret_btf_id = BPF_PTR_POISON, .arg1_type = ARG_PTR_TO_KPTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, .arg2_btf_id = BPF_PTR_POISON, }; /* Since the upper 8 bits of dynptr->size is reserved, the * maximum supported size is 2^24 - 1. */ #define DYNPTR_MAX_SIZE ((1UL << 24) - 1) #define DYNPTR_TYPE_SHIFT 28 #define DYNPTR_SIZE_MASK 0xFFFFFF #define DYNPTR_RDONLY_BIT BIT(31) static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_RDONLY_BIT; } void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) { ptr->size |= DYNPTR_RDONLY_BIT; } static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) { ptr->size |= type << DYNPTR_TYPE_SHIFT; } static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr) { return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; } u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; } static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size) { u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; ptr->size = new_size | metadata; } int bpf_dynptr_check_size(u32 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size) { ptr->data = data; ptr->offset = offset; ptr->size = size; bpf_dynptr_set_type(ptr, type); } void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) { memset(ptr, 0, sizeof(*ptr)); } static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) { u32 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; return 0; } BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) { int err; BTF_TYPE_EMIT(struct bpf_dynptr); err = bpf_dynptr_check_size(size); if (err) goto error; /* flags is currently unsupported */ if (flags) { err = -EINVAL; goto error; } bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); return 0; error: bpf_dynptr_set_null(ptr); return err; } static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .func = bpf_dynptr_from_mem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, }; BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, u32, offset, u64, flags) { enum bpf_dynptr_type type; int err; if (!src->data || flags) return -EINVAL; err = bpf_dynptr_check_off_len(src, offset, len); if (err) return err; type = bpf_dynptr_get_type(src); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: /* Source and destination may possibly overlap, hence use memmove to * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr * pointing to overlapping PTR_TO_MAP_VALUE regions. */ memmove(dst, src->data + src->offset + offset, len); return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; } } static const struct bpf_func_proto bpf_dynptr_read_proto = { .func = bpf_dynptr_read, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len, u64, flags) { enum bpf_dynptr_type type; int err; if (!dst->data || __bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); if (err) return err; type = bpf_dynptr_get_type(dst); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: if (flags) return -EINVAL; /* Source and destination may possibly overlap, hence use memmove to * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr * pointing to overlapping PTR_TO_MAP_VALUE regions. */ memmove(dst->data + dst->offset + offset, src, len); return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, flags); case BPF_DYNPTR_TYPE_XDP: if (flags) return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; } } static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) { enum bpf_dynptr_type type; int err; if (!ptr->data) return 0; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) return 0; if (__bpf_dynptr_is_rdonly(ptr)) return 0; type = bpf_dynptr_get_type(ptr); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: case BPF_DYNPTR_TYPE_XDP: /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); return 0; } } static const struct bpf_func_proto bpf_dynptr_data_proto = { .func = bpf_dynptr_data, .gpl_only = false, .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; case BPF_FUNC_map_push_elem: return &bpf_map_push_elem_proto; case BPF_FUNC_map_pop_elem: return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; case BPF_FUNC_map_lookup_percpu_elem: return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; case BPF_FUNC_get_numa_node_id: return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_ktime_get_tai_ns: return &bpf_ktime_get_tai_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: return &bpf_ringbuf_reserve_proto; case BPF_FUNC_ringbuf_submit: return &bpf_ringbuf_submit_proto; case BPF_FUNC_ringbuf_discard: return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; case BPF_FUNC_strtol: return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; default: break; } if (!bpf_capable()) return NULL; switch (func_id) { case BPF_FUNC_spin_lock: return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_timer_init: return &bpf_timer_init_proto; case BPF_FUNC_timer_set_callback: return &bpf_timer_set_callback_proto; case BPF_FUNC_timer_start: return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; case BPF_FUNC_kptr_xchg: return &bpf_kptr_xchg_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_user_ringbuf_drain: return &bpf_user_ringbuf_drain_proto; case BPF_FUNC_ringbuf_reserve_dynptr: return &bpf_ringbuf_reserve_dynptr_proto; case BPF_FUNC_ringbuf_submit_dynptr: return &bpf_ringbuf_submit_dynptr_proto; case BPF_FUNC_ringbuf_discard_dynptr: return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_dynptr_from_mem: return &bpf_dynptr_from_mem_proto; case BPF_FUNC_dynptr_read: return &bpf_dynptr_read_proto; case BPF_FUNC_dynptr_write: return &bpf_dynptr_write_proto; case BPF_FUNC_dynptr_data: return &bpf_dynptr_data_proto; #ifdef CONFIG_CGROUPS case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_get_current_ancestor_cgroup_id: return &bpf_get_current_ancestor_cgroup_id_proto; #endif default: break; } if (!perfmon_capable()) return NULL; switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: return &bpf_get_current_task_btf_proto; case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: return NULL; } } void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { struct list_head *head = list_head, *orig_head = list_head; BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head)); BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head)); /* Do the actual list draining outside the lock to not hold the lock for * too long, and also prevent deadlocks if tracing programs end up * executing on entry/exit of functions called inside the critical * section, and end up doing map ops that call bpf_list_head_free for * the same map value again. */ __bpf_spin_lock_irqsave(spin_lock); if (!head->next || list_empty(head)) goto unlock; head = head->next; unlock: INIT_LIST_HEAD(orig_head); __bpf_spin_unlock_irqrestore(spin_lock); while (head != orig_head) { void *obj = head; obj -= field->graph_root.node_offset; head = head->next; /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } /* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are * 'rb_node *', so field name of rb_node within containing struct is not * needed. * * Since bpf_rb_tree's node type has a corresponding struct btf_field with * graph_root.node_offset, it's not necessary to know field name * or type of node struct */ #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \ for (pos = rb_first_postorder(root); \ pos && ({ n = rb_next_postorder(pos); 1; }); \ pos = n) void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock) { struct rb_root_cached orig_root, *root = rb_root; struct rb_node *pos, *n; void *obj; BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root)); BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root)); __bpf_spin_lock_irqsave(spin_lock); orig_root = *root; *root = RB_ROOT_CACHED; __bpf_spin_unlock_irqrestore(spin_lock); bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) { obj = pos; obj -= field->graph_root.node_offset; migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; u64 size = local_type_id__k; void *p; p = bpf_mem_alloc(&bpf_global_ma, size); if (!p) return NULL; if (meta) bpf_obj_init(meta->record, p); return p; } __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) { u64 size = local_type_id__k; /* The verifier has ensured that meta__ign must be NULL */ return bpf_mem_alloc(&bpf_global_percpu_ma, size); } /* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) { struct bpf_mem_alloc *ma; if (rec && rec->refcount_off >= 0 && !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { /* Object is refcounted and refcount_dec didn't result in 0 * refcount. Return without freeing the object */ return; } if (rec) bpf_obj_free_fields(rec, p); if (percpu) ma = &bpf_global_percpu_ma; else ma = &bpf_global_ma; if (rec && rec->refcount_off >= 0) bpf_mem_free_rcu(ma, p); else bpf_mem_free(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false); } __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) { /* The verifier has ensured that meta__ign must be NULL */ bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc); } __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; struct bpf_refcount *ref; /* Could just cast directly to refcount_t *, but need some code using * bpf_refcount type so that it is emitted in vmlinux BTF */ ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); if (!refcount_inc_not_zero((refcount_t *)ref)) return NULL; /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null * in verifier.c */ return (void *)p__refcounted_kptr; } static int __bpf_list_add(struct bpf_list_node_kern *node, struct bpf_list_head *head, bool tail, struct btf_record *rec, u64 off) { struct list_head *n = &node->list_head, *h = (void *)head; /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); /* node->owner != NULL implies !list_empty(n), no need to separately * check the latter */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } tail ? list_add_tail(n, h) : list_add(n, h); WRITE_ONCE(node->owner, head); return 0; } __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, struct bpf_list_node *node, void *meta__ign, u64 off) { struct bpf_list_node_kern *n = (void *)node; struct btf_struct_meta *meta = meta__ign; return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off); } __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, struct bpf_list_node *node, void *meta__ign, u64 off) { struct bpf_list_node_kern *n = (void *)node; struct btf_struct_meta *meta = meta__ign; return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off); } static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) { struct list_head *n, *h = (void *)head; struct bpf_list_node_kern *node; /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); if (list_empty(h)) return NULL; n = tail ? h->prev : h->next; node = container_of(n, struct bpf_list_node_kern, list_head); if (WARN_ON_ONCE(READ_ONCE(node->owner) != head)) return NULL; list_del_init(n); WRITE_ONCE(node->owner, NULL); return (struct bpf_list_node *)n; } __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) { return __bpf_list_del(head, false); } __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) { return __bpf_list_del(head, true); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct bpf_rb_node *node) { struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = &node_internal->rb_node; /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or * n is owned by some other tree. No need to check RB_EMPTY_NODE(n) */ if (READ_ONCE(node_internal->owner) != root) return NULL; rb_erase_cached(n, r); RB_CLEAR_NODE(n); WRITE_ONCE(node_internal->owner, NULL); return (struct bpf_rb_node *)n; } /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program */ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node_kern *node, void *less, struct btf_record *rec, u64 off) { struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; struct rb_node *parent = NULL, *n = &node->rb_node; bpf_callback_t cb = (bpf_callback_t)less; bool leftmost = true; /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately * check the latter */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } while (*link) { parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = false; } } rb_link_node(n, parent, link); rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); WRITE_ONCE(node->owner, root); return 0; } __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), void *meta__ign, u64 off) { struct btf_struct_meta *meta = meta__ign; struct bpf_rb_node_kern *n = (void *)node; return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) { struct rb_root_cached *r = (struct rb_root_cached *)root; return (struct bpf_rb_node *)rb_first_cached(r); } /** * bpf_task_acquire - Acquire a reference to a task. A task acquired by this * kfunc which is not stored in a map as a kptr, must be released by calling * bpf_task_release(). * @p: The task on which a reference is being acquired. */ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { if (refcount_inc_not_zero(&p->rcu_users)) return p; return NULL; } /** * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released. */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { put_task_struct_rcu_user(p); } #ifdef CONFIG_CGROUPS /** * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by * this kfunc which is not stored in a map as a kptr, must be released by * calling bpf_cgroup_release(). * @cgrp: The cgroup on which a reference is being acquired. */ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) { return cgroup_tryget(cgrp) ? cgrp : NULL; } /** * bpf_cgroup_release - Release the reference acquired on a cgroup. * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to * not be freed until the current grace period has ended, even if its refcount * drops to 0. * @cgrp: The cgroup on which a reference is being released. */ __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp) { cgroup_put(cgrp); } /** * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor * array. A cgroup returned by this kfunc which is not subsequently stored in a * map, must be released by calling bpf_cgroup_release(). * @cgrp: The cgroup for which we're performing a lookup. * @level: The level of ancestor to look up. */ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) { struct cgroup *ancestor; if (level > cgrp->level || level < 0) return NULL; /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */ ancestor = cgrp->ancestors[level]; if (!cgroup_tryget(ancestor)) return NULL; return ancestor; } /** * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this * kfunc which is not subsequently stored in a map, must be released by calling * bpf_cgroup_release(). * @cgid: cgroup id. */ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { struct cgroup *cgrp; cgrp = cgroup_get_from_id(cgid); if (IS_ERR(cgrp)) return NULL; return cgrp; } /** * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test * task's membership of cgroup ancestry. * @task: the task to be tested * @ancestor: possible ancestor of @task's cgroup * * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. * It follows all the same rules as cgroup_is_descendant, and only applies * to the default hierarchy. */ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) { long ret; rcu_read_lock(); ret = task_under_cgroup_hierarchy(task, ancestor); rcu_read_unlock(); return ret; } #endif /* CONFIG_CGROUPS */ /** * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up * in the root pid namespace idr. If a task is returned, it must either be * stored in a map, or released with bpf_task_release(). * @pid: The pid of the task being looked up. */ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) { struct task_struct *p; rcu_read_lock(); p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) p = bpf_task_acquire(p); rcu_read_unlock(); return p; } /** * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. * @ptr: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * If the intention is to write to the data slice, please use * bpf_dynptr_slice_rdwr. * * The user must check that the returned pointer is not null before using it. * * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, void *buffer__opt, u32 buffer__szk) { enum bpf_dynptr_type type; u32 len = buffer__szk; int err; if (!ptr->data) return NULL; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) return NULL; type = bpf_dynptr_get_type(ptr); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: return ptr->data + ptr->offset + offset; case BPF_DYNPTR_TYPE_SKB: if (buffer__opt) return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt); else return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len); case BPF_DYNPTR_TYPE_XDP: { void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); if (!IS_ERR_OR_NULL(xdp_ptr)) return xdp_ptr; if (!buffer__opt) return NULL; bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false); return buffer__opt; } default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; } } /** * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. * @ptr: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * The returned pointer is writable and may point to either directly the dynptr * data at the requested offset or to the buffer if unable to obtain a direct * data pointer to (example: the requested slice is to the paged area of an skb * packet). In the case where the returned pointer is to the buffer, the user * is responsible for persisting writes through calling bpf_dynptr_write(). This * usually looks something like this pattern: * * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer)); * if (!eth) * return TC_ACT_SHOT; * * // mutate eth header // * * if (eth == buffer) * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); * * Please note that, as in the example above, the user must check that the * returned pointer is not null before using it. * * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, void *buffer__opt, u32 buffer__szk) { if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * * For skb-type dynptrs, it is safe to write into the returned pointer * if the bpf program allows skb data writes. There are two possiblities * that may occur when calling bpf_dynptr_slice_rdwr: * * 1) The requested slice is in the head of the skb. In this case, the * returned pointer is directly to skb data, and if the skb is cloned, the * verifier will have uncloned it (see bpf_unclone_prologue()) already. * The pointer can be directly written into. * * 2) Some portion of the requested slice is in the paged buffer area. * In this case, the requested data will be copied out into the buffer * and the returned pointer will be a pointer to the buffer. The skb * will not be pulled. To persist the write, the user will need to call * bpf_dynptr_write(), which will pull the skb and commit the write. * * Similarly for xdp programs, if the requested slice is not across xdp * fragments, then a direct pointer will be returned, otherwise the data * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); } __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) { u32 size; if (!ptr->data || start > end) return -EINVAL; size = __bpf_dynptr_size(ptr); if (start > size || end > size) return -ERANGE; ptr->offset += start; bpf_dynptr_set_size(ptr, end - start); return 0; } __bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) { return !ptr->data; } __bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) { if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } __bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, struct bpf_dynptr_kern *clone__uninit) { if (!ptr->data) { bpf_dynptr_set_null(clone__uninit); return -EINVAL; } *clone__uninit = *ptr; return 0; } __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; } __bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) { return obj__ign; } __bpf_kfunc void bpf_rcu_read_lock(void) { rcu_read_lock(); } __bpf_kfunc void bpf_rcu_read_unlock(void) { rcu_read_unlock(); } struct bpf_throw_ctx { struct bpf_prog_aux *aux; u64 sp; u64 bp; int cnt; }; static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp) { struct bpf_throw_ctx *ctx = cookie; struct bpf_prog *prog; if (!is_bpf_text_address(ip)) return !ctx->cnt; prog = bpf_prog_ksym_find(ip); ctx->cnt++; if (bpf_is_subprog(prog)) return true; ctx->aux = prog->aux; ctx->sp = sp; ctx->bp = bp; return false; } __bpf_kfunc void bpf_throw(u64 cookie) { struct bpf_throw_ctx ctx = {}; arch_bpf_stack_walk(bpf_stack_walker, &ctx); WARN_ON_ONCE(!ctx.aux); if (ctx.aux) WARN_ON_ONCE(!ctx.aux->exception_boundary); WARN_ON_ONCE(!ctx.bp); WARN_ON_ONCE(!ctx.cnt); /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning * deeper stack depths than ctx.sp as we do not return from bpf_throw, * which skips compiler generated instrumentation to do the same. */ kasan_unpoison_task_stack_below((void *)(long)ctx.sp); ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp); WARN(1, "A call to BPF exception callback should never return\n"); } __bpf_kfunc_end_defs(); BTF_SET8_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) BTF_SET8_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, .set = &generic_btf_ids, }; BTF_ID_LIST(generic_dtor_ids) BTF_ID(struct, task_struct) BTF_ID(func, bpf_task_release) #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release) #endif BTF_SET8_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) #endif BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, .set = &common_btf_ids, }; static int __init kfunc_init(void) { int ret; const struct btf_id_dtor_kfunc generic_dtors[] = { { .btf_id = generic_dtor_ids[0], .kfunc_btf_id = generic_dtor_ids[1] }, #ifdef CONFIG_CGROUPS { .btf_id = generic_dtor_ids[2], .kfunc_btf_id = generic_dtor_ids[3] }, #endif }; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); } late_initcall(kfunc_init); |
1 22 1 1 3 1 3 1 3 27 5 22 20 2 3 1 1 21 9 9 9 9 2 1 1 9 2 1 1 1 51 9 37 7 14 46 8 41 52 1 45 7 47 51 51 51 20 51 1 14 25 3 33 48 2 1 1 6 2 3 1 1 6 10 4 17 2 1 15 16 61 58 3 55 1 46 16 38 2 1 7 1 1 1 1 13 2 7 6 1 2 1 1 2 1 1 1 1 2 1 14 1 1 1 4 1 13 1 1 29 1 17 2 6 6 1 1 23 1 6 18 3 30 4 4 1 5 12 1 1 1 6 5 1 3 4 2 7 1 1 1 1 283 2 21 341 64 278 || // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/ioctl.c * * Copyright (C) 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) */ #include <linux/fs.h> #include <linux/capability.h> #include <linux/time.h> #include <linux/compat.h> #include <linux/mount.h> #include <linux/file.h> #include <linux/quotaops.h> #include <linux/random.h> #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/iversion.h> #include <linux/fileattr.h> #include <linux/uuid.h> #include "ext4_jbd2.h" #include "ext4.h" #include <linux/fsmap.h> #include "fsmap.h" #include <trace/events/ext4.h> typedef void ext4_update_sb_callback(struct ext4_super_block *es, const void *arg); /* * Superblock modification callback function for changing file system * label */ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) { /* Sanity check, this should never happen */ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX); memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX); } /* * Superblock modification callback function for changing file system * UUID. */ static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg) { memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE); } static int ext4_update_primary_sb(struct super_block *sb, handle_t *handle, ext4_update_sb_callback func, const void *arg) { int err = 0; struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh = sbi->s_sbh; struct ext4_super_block *es = sbi->s_es; trace_ext4_update_sb(sb, bh->b_blocknr, 1); BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto out_err; lock_buffer(bh); func(es, arg); ext4_superblock_csum_set(sb); unlock_buffer(bh); if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) { ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " "superblock detected"); clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } err = ext4_handle_dirty_metadata(handle, NULL, bh); if (err) goto out_err; err = sync_dirty_buffer(bh); out_err: ext4_std_error(sb, err); return err; } /* * Update one backup superblock in the group 'grp' using the callback * function 'func' and argument 'arg'. If the handle is NULL the * modification is not journalled. * * Returns: 0 when no modification was done (no superblock in the group) * 1 when the modification was successful * <0 on error */ static int ext4_update_backup_sb(struct super_block *sb, handle_t *handle, ext4_group_t grp, ext4_update_sb_callback func, const void *arg) { int err = 0; ext4_fsblk_t sb_block; struct buffer_head *bh; unsigned long offset = 0; struct ext4_super_block *es; if (!ext4_bg_has_super(sb, grp)) return 0; /* * For the group 0 there is always 1k padding, so we have * either adjust offset, or sb_block depending on blocksize */ if (grp == 0) { sb_block = 1 * EXT4_MIN_BLOCK_SIZE; offset = do_div(sb_block, sb->s_blocksize); } else { sb_block = ext4_group_first_block_no(sb, grp); offset = 0; } trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0); bh = ext4_sb_bread(sb, sb_block, 0); if (IS_ERR(bh)) return PTR_ERR(bh); if (handle) { BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto out_bh; } es = (struct ext4_super_block *) (bh->b_data + offset); lock_buffer(bh); if (ext4_has_metadata_csum(sb) && es->s_checksum != ext4_superblock_csum(sb, es)) { ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " "superblock %llu", sb_block); unlock_buffer(bh); goto out_bh; } func(es, arg); if (ext4_has_metadata_csum(sb)) es->s_checksum = ext4_superblock_csum(sb, es); set_buffer_uptodate(bh); unlock_buffer(bh); if (handle) { err = ext4_handle_dirty_metadata(handle, NULL, bh); if (err) goto out_bh; } else { BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); } err = sync_dirty_buffer(bh); out_bh: brelse(bh); ext4_std_error(sb, err); return (err) ? err : 1; } /* * Update primary and backup superblocks using the provided function * func and argument arg. * * Only the primary superblock and at most two backup superblock * modifications are journalled; the rest is modified without journal. * This is safe because e2fsck will re-write them if there is a problem, * and we're very unlikely to ever need more than two backups. */ static int ext4_update_superblocks_fn(struct super_block *sb, ext4_update_sb_callback func, const void *arg) { handle_t *handle; ext4_group_t ngroups; unsigned int three = 1; unsigned int five = 5; unsigned int seven = 7; int err = 0, ret, i; ext4_group_t grp, primary_grp; struct ext4_sb_info *sbi = EXT4_SB(sb); /* * We can't update superblocks while the online resize is running */ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags)) { ext4_msg(sb, KERN_ERR, "Can't modify superblock while" "performing online resize"); return -EBUSY; } /* * We're only going to update primary superblock and two * backup superblocks in this transaction. */ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto out; } /* Update primary superblock */ err = ext4_update_primary_sb(sb, handle, func, arg); if (err) { ext4_msg(sb, KERN_ERR, "Failed to update primary " "superblock"); goto out_journal; } primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr); ngroups = ext4_get_groups_count(sb); /* * Update backup superblocks. We have to start from group 0 * because it might not be where the primary superblock is * if the fs is mounted with -o sb=<backup_sb_block> */ i = 0; grp = 0; while (grp < ngroups) { /* Skip primary superblock */ if (grp == primary_grp) goto next_grp; ret = ext4_update_backup_sb(sb, handle, grp, func, arg); if (ret < 0) { /* Ignore bad checksum; try to update next sb */ if (ret == -EFSBADCRC) goto next_grp; err = ret; goto out_journal; } i += ret; if (handle && i > 1) { /* * We're only journalling primary superblock and * two backup superblocks; the rest is not * journalled. */ err = ext4_journal_stop(handle); if (err) goto out; handle = NULL; } next_grp: grp = ext4_list_backups(sb, &three, &five, &seven); } out_journal: if (handle) { ret = ext4_journal_stop(handle); if (ret && !err) err = ret; } out: clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags); smp_mb__after_atomic(); return err ? err : 0; } /* * Swap memory between @a and @b for @len bytes. * * @a: pointer to first memory area * @b: pointer to second memory area * @len: number of bytes to swap * */ static void memswap(void *a, void *b, size_t len) { unsigned char *ap, *bp; ap = (unsigned char *)a; bp = (unsigned char *)b; while (len-- > 0) { swap(*ap, *bp); ap++; bp++; } } /* * Swap i_data and associated attributes between @inode1 and @inode2. * This function is used for the primary swap between inode1 and inode2 * and also to revert this primary swap in case of errors. * * Therefore you have to make sure, that calling this method twice * will revert all changes. * * @inode1: pointer to first inode * @inode2: pointer to second inode */ static void swap_inode_data(struct inode *inode1, struct inode *inode2) { loff_t isize; struct ext4_inode_info *ei1; struct ext4_inode_info *ei2; unsigned long tmp; struct timespec64 ts1, ts2; ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); swap(inode1->i_version, inode2->i_version); ts1 = inode_get_atime(inode1); ts2 = inode_get_atime(inode2); inode_set_atime_to_ts(inode1, ts2); inode_set_atime_to_ts(inode2, ts1); ts1 = inode_get_mtime(inode1); ts2 = inode_get_mtime(inode2); inode_set_mtime_to_ts(inode1, ts2); inode_set_mtime_to_ts(inode2, ts1); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP; ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) | (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP); ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP); swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); isize = i_size_read(inode1); i_size_write(inode1, i_size_read(inode2)); i_size_write(inode2, isize); } void ext4_reset_inode_seed(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __le32 inum = cpu_to_le32(inode->i_ino); __le32 gen = cpu_to_le32(inode->i_generation); __u32 csum; if (!ext4_has_metadata_csum(inode->i_sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, sizeof(inum)); ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen)); } /* * Swap the information from the given @inode and the inode * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other * important fields of the inodes. * * @sb: the super block of the filesystem * @idmap: idmap of the mount the inode was found from * @inode: the inode to swap with EXT4_BOOT_LOADER_INO * */ static long swap_inode_boot_loader(struct super_block *sb, struct mnt_idmap *idmap, struct inode *inode) { handle_t *handle; int err; struct inode *inode_bl; struct ext4_inode_info *ei_bl; qsize_t size, size_bl, diff; blkcnt_t blocks; unsigned short bytes; inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL | EXT4_IGET_BAD); if (IS_ERR(inode_bl)) return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl); /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ lock_two_nondirectories(inode, inode_bl); if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) || IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) || (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) || ext4_has_inline_data(inode)) { err = -EINVAL; goto journal_err_out; } if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || !inode_owner_or_capable(idmap, inode) || !capable(CAP_SYS_ADMIN)) { err = -EPERM; goto journal_err_out; } filemap_invalidate_lock(inode->i_mapping); err = filemap_write_and_wait(inode->i_mapping); if (err) goto err_out; err = filemap_write_and_wait(inode_bl->i_mapping); if (err) goto err_out; /* Wait for all existing dio workers */ inode_dio_wait(inode); inode_dio_wait(inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); if (IS_ERR(handle)) { err = -EINVAL; goto err_out; } ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) { /* this inode has never been used as a BOOT_LOADER */ set_nlink(inode_bl, 1); i_uid_write(inode_bl, 0); i_gid_write(inode_bl, 0); inode_bl->i_flags = 0; ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode_bl); } else memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data)); } err = dquot_initialize(inode); if (err) goto err_out1; size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes; size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes; diff = size - size_bl; swap_inode_data(inode, inode_bl); inode_set_ctime_current(inode); inode_set_ctime_current(inode_bl); inode_inc_iversion(inode); inode->i_generation = get_random_u32(); inode_bl->i_generation = get_random_u32(); ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); ext4_discard_preallocations(inode, 0); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { /* No need to update quota information. */ ext4_warning(inode->i_sb, "couldn't mark inode #%lu dirty (err %d)", inode->i_ino, err); /* Revert all changes: */ swap_inode_data(inode, inode_bl); ext4_mark_inode_dirty(handle, inode); goto err_out1; } blocks = inode_bl->i_blocks; bytes = inode_bl->i_bytes; inode_bl->i_blocks = inode->i_blocks; inode_bl->i_bytes = inode->i_bytes; err = ext4_mark_inode_dirty(handle, inode_bl); if (err < 0) { /* No need to update quota information. */ ext4_warning(inode_bl->i_sb, "couldn't mark inode #%lu dirty (err %d)", inode_bl->i_ino, err); goto revert; } /* Bootloader inode should not be counted into quota information. */ if (diff > 0) dquot_free_space(inode, diff); else err = dquot_alloc_space(inode, -1 * diff); if (err < 0) { revert: /* Revert all changes: */ inode_bl->i_blocks = blocks; inode_bl->i_bytes = bytes; swap_inode_data(inode, inode_bl); ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode_bl); } err_out1: ext4_journal_stop(handle); ext4_double_up_write_data_sem(inode, inode_bl); err_out: filemap_invalidate_unlock(inode->i_mapping); journal_err_out: unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); return err; } /* * If immutable is set and we are not clearing it, we're not allowed to change * anything else in the inode. Don't error out if we're only trying to set * immutable on an immutable file. */ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); unsigned int oldflags = ei->i_flags; if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) return 0; if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) return -EPERM; if (ext4_has_feature_project(inode->i_sb) && __kprojid_val(ei->i_projid) != new_projid) return -EPERM; return 0; } static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); if (S_ISDIR(inode->i_mode)) return; if (test_opt2(inode->i_sb, DAX_NEVER) || test_opt(inode->i_sb, DAX_ALWAYS)) return; if ((ei->i_flags ^ flags) & EXT4_DAX_FL) d_mark_dontcache(inode); } static bool dax_compatible(struct inode *inode, unsigned int oldflags, unsigned int flags) { /* Allow the DAX flag to be changed on inline directories */ if (S_ISDIR(inode->i_mode)) { flags &= ~EXT4_INLINE_DATA_FL; oldflags &= ~EXT4_INLINE_DATA_FL; } if (flags & EXT4_DAX_FL) { if ((oldflags & EXT4_DAX_MUT_EXCL) || ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS)) { return false; } } if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL)) return false; return true; } static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); handle_t *handle = NULL; int err = -EPERM, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags, mask, i; struct super_block *sb = inode->i_sb; /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) goto flags_out; oldflags = ei->i_flags; /* * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } if (!dax_compatible(inode, oldflags, flags)) { err = -EOPNOTSUPP; goto flags_out; } if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) { if (!ext4_has_feature_casefold(sb)) { err = -EOPNOTSUPP; goto flags_out; } if (!S_ISDIR(inode->i_mode)) { err = -ENOTDIR; goto flags_out; } if (!ext4_empty_dir(inode)) { err = -ENOTEMPTY; goto flags_out; } } /* * Wait for all pending directio and then flush all the dirty pages * for this file. The flush marks all the pages readonly, so any * subsequent attempt to write to the file (particularly mmap pages) * will come through the filesystem and fail. */ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && (flags & EXT4_IMMUTABLE_FL)) { inode_dio_wait(inode); err = filemap_write_and_wait(inode->i_mapping); if (err) goto flags_out; } handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto flags_out; } if (IS_SYNC(inode)) ext4_handle_sync(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto flags_err; ext4_dax_dontcache(inode, flags); for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { if (!(mask & EXT4_FL_USER_MODIFIABLE)) continue; /* These flags get special treatment later */ if (mask == EXT4_JOURNAL_DATA_FL || mask == EXT4_EXTENTS_FL) continue; if (mask & flags) ext4_set_inode_flag(inode, i); else ext4_clear_inode_flag(inode, i); } ext4_set_inode_flags(inode, false); inode_set_ctime_current(inode); inode_inc_iversion(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext4_journal_stop(handle); if (err) goto flags_out; if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { /* * Changes to the journaling mode can cause unsafe changes to * S_DAX if the inode is DAX */ if (IS_DAX(inode)) { err = -EBUSY; goto flags_out; } err = ext4_change_inode_journal_flag(inode, flags & EXT4_JOURNAL_DATA_FL); if (err) goto flags_out; } if (migrate) { if (flags & EXT4_EXTENTS_FL) err = ext4_ext_migrate(inode); else err = ext4_ind_migrate(inode); } flags_out: return err; } #ifdef CONFIG_QUOTA static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) { struct super_block *sb = inode->i_sb; struct ext4_inode_info *ei = EXT4_I(inode); int err, rc; handle_t *handle; kprojid_t kprojid; struct ext4_iloc iloc; struct ext4_inode *raw_inode; struct dquot *transfer_to[MAXQUOTAS] = { }; if (!ext4_has_feature_project(sb)) { if (projid != EXT4_DEF_PROJID) return -EOPNOTSUPP; else return 0; } if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE) return -EOPNOTSUPP; kprojid = make_kprojid(&init_user_ns, (projid_t)projid); if (projid_eq(kprojid, EXT4_I(inode)->i_projid)) return 0; err = -EPERM; /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) return err; err = dquot_initialize(inode); if (err) return err; err = ext4_get_inode_loc(inode, &iloc); if (err) return err; raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { err = ext4_expand_extra_isize(inode, EXT4_SB(sb)->s_want_extra_isize, &iloc); if (err) return err; } else { brelse(iloc.bh); } handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); if (IS_ERR(handle)) return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto out_stop; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); if (!IS_ERR(transfer_to[PRJQUOTA])) { /* __dquot_transfer() calls back ext4_get_inode_usage() which * counts xattr inode references. */ down_read(&EXT4_I(inode)->xattr_sem); err = __dquot_transfer(inode, transfer_to); up_read(&EXT4_I(inode)->xattr_sem); dqput(transfer_to[PRJQUOTA]); if (err) goto out_dirty; } EXT4_I(inode)->i_projid = kprojid; inode_set_ctime_current(inode); inode_inc_iversion(inode); out_dirty: rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; out_stop: ext4_journal_stop(handle); return err; } #else static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) { if (projid != EXT4_DEF_PROJID) return -EOPNOTSUPP; return 0; } #endif int ext4_force_shutdown(struct super_block *sb, u32 flags) { struct ext4_sb_info *sbi = EXT4_SB(sb); int ret; if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) return -EINVAL; if (ext4_forced_shutdown(sb)) return 0; ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags); trace_ext4_shutdown(sb, flags); switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: ret = freeze_bdev(sb->s_bdev); if (ret) return ret; set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); thaw_bdev(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { (void) ext4_force_commit(sb); jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); } break; case EXT4_GOING_FLAGS_NOLOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); break; default: return -EINVAL; } clear_opt(sb, DISCARD); return 0; } static int ext4_ioctl_shutdown(struct super_block *sb, unsigned long arg) { u32 flags; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; return ext4_force_shutdown(sb, flags); } struct getfsmap_info { struct super_block *gi_sb; struct fsmap_head __user *gi_data; unsigned int gi_idx; __u32 gi_last_flags; }; static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv) { struct getfsmap_info *info = priv; struct fsmap fm; trace_ext4_getfsmap_mapping(info->gi_sb, xfm); info->gi_last_flags = xfm->fmr_flags; ext4_fsmap_from_internal(info->gi_sb, &fm, xfm); if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm, sizeof(struct fsmap))) return -EFAULT; return 0; } static int ext4_ioc_getfsmap(struct super_block *sb, struct fsmap_head __user *arg) { struct getfsmap_info info = { NULL }; struct ext4_fsmap_head xhead = {0}; struct fsmap_head head; bool aborted = false; int error; if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) return -EFAULT; if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || memchr_inv(head.fmh_keys[0].fmr_reserved, 0, sizeof(head.fmh_keys[0].fmr_reserved)) || memchr_inv(head.fmh_keys[1].fmr_reserved, 0, sizeof(head.fmh_keys[1].fmr_reserved))) return -EINVAL; /* * ext4 doesn't report file extents at all, so the only valid * file offsets are the magic ones (all zeroes or all ones). */ if (head.fmh_keys[0].fmr_offset || (head.fmh_keys[1].fmr_offset != 0 && head.fmh_keys[1].fmr_offset != -1ULL)) return -EINVAL; xhead.fmh_iflags = head.fmh_iflags; xhead.fmh_count = head.fmh_count; ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]); ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]); trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]); trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]); info.gi_sb = sb; info.gi_data = arg; error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info); if (error == EXT4_QUERY_RANGE_ABORT) aborted = true; else if (error) return error; /* If we didn't abort, set the "last" flag in the last fmx */ if (!aborted && info.gi_idx) { info.gi_last_flags |= FMR_OF_LAST; if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags, &info.gi_last_flags, sizeof(info.gi_last_flags))) return -EFAULT; } /* copy back header */ head.fmh_entries = xhead.fmh_entries; head.fmh_oflags = xhead.fmh_oflags; if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) return -EFAULT; return 0; } static long ext4_ioctl_group_add(struct file *file, struct ext4_new_group_data *input) { struct super_block *sb = file_inode(file)->i_sb; int err, err2=0; err = ext4_resize_begin(sb); if (err) return err; if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; goto group_add_out; } err = mnt_want_write_file(file); if (err) goto group_add_out; err = ext4_group_add(sb, input); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(file); if (!err && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, input->group); group_add_out: err2 = ext4_resize_end(sb, false); if (err == 0) err = err2; return err; } int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct ext4_inode_info *ei = EXT4_I(inode); u32 flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (S_ISREG(inode->i_mode)) flags &= ~FS_PROJINHERIT_FL; fileattr_fill_flags(fa, flags); if (ext4_has_feature_project(inode->i_sb)) fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); return 0; } int ext4_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); u32 flags = fa->flags; int err = -EOPNOTSUPP; if (flags & ~EXT4_FL_USER_VISIBLE) goto out; /* * chattr(1) grabs flags via GETFLAGS, modifies the result and * passes that to SETFLAGS. So we cannot easily make SETFLAGS * more restrictive than just silently masking off visible but * not settable flags as we always did. */ flags &= EXT4_FL_USER_MODIFIABLE; if (ext4_mask_flags(inode->i_mode, flags) != flags) goto out; err = ext4_ioctl_check_immutable(inode, fa->fsx_projid, flags); if (err) goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; err = ext4_ioctl_setproject(inode, fa->fsx_projid); out: return err; } /* So that the fiemap access checks can't overflow on 32 bit machines. */ #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) { struct fiemap fiemap; struct fiemap __user *ufiemap = (struct fiemap __user *) arg; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = file_inode(filp); int error; if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = ufiemap->fm_extents; error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, fiemap.fm_length); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; } static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) { int err = 0; __u32 flags = 0; unsigned int flush_flags = 0; struct super_block *sb = file_inode(filp)->i_sb; if (copy_from_user(&flags, (__u32 __user *)arg, sizeof(__u32))) return -EFAULT; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* check for invalid bits set */ if ((flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) || ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && (flags & JBD2_JOURNAL_FLUSH_ZEROOUT))) return -EINVAL; if (!EXT4_SB(sb)->s_journal) return -ENODEV; if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) return -EOPNOTSUPP; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) return 0; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DISCARD) flush_flags |= JBD2_JOURNAL_FLUSH_DISCARD; if (flags & EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT) { flush_flags |= JBD2_JOURNAL_FLUSH_ZEROOUT; pr_info_ratelimited("warning: checkpointing journal with EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT can be slow"); } jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, flush_flags); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return err; } static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label) { size_t len; int ret = 0; char new_label[EXT4_LABEL_MAX + 1]; struct super_block *sb = file_inode(filp)->i_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * Copy the maximum length allowed for ext4 label with one more to * find the required terminating null byte in order to test the * label length. The on disk label doesn't need to be null terminated. */ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1)) return -EFAULT; len = strnlen(new_label, EXT4_LABEL_MAX + 1); if (len > EXT4_LABEL_MAX) return -EINVAL; /* * Clear the buffer after the new label */ memset(new_label + len, 0, EXT4_LABEL_MAX - len); ret = mnt_want_write_file(filp); if (ret) return ret; ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label); mnt_drop_write_file(filp); return ret; } static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label) { char label[EXT4_LABEL_MAX + 1]; /* * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because * FSLABEL_MAX must include terminating null byte, while s_volume_name * does not have to. */ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX); memset(label, 0, sizeof(label)); lock_buffer(sbi->s_sbh); strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX); unlock_buffer(sbi->s_sbh); if (copy_to_user(user_label, label, sizeof(label))) return -EFAULT; return 0; } static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi, struct fsuuid __user *ufsuuid) { struct fsuuid fsuuid; __u8 uuid[UUID_SIZE]; if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid))) return -EFAULT; if (fsuuid.fsu_len == 0) { fsuuid.fsu_len = UUID_SIZE; if (copy_to_user(&ufsuuid->fsu_len, &fsuuid.fsu_len, sizeof(fsuuid.fsu_len))) return -EFAULT; return 0; } if (fsuuid.fsu_len < UUID_SIZE || fsuuid.fsu_flags != 0) return -EINVAL; lock_buffer(sbi->s_sbh); memcpy(uuid, sbi->s_es->s_uuid, UUID_SIZE); unlock_buffer(sbi->s_sbh); fsuuid.fsu_len = UUID_SIZE; if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid)) || copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE)) return -EFAULT; return 0; } static int ext4_ioctl_setuuid(struct file *filp, const struct fsuuid __user *ufsuuid) { int ret = 0; struct super_block *sb = file_inode(filp)->i_sb; struct fsuuid fsuuid; __u8 uuid[UUID_SIZE]; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * If any checksums (group descriptors or metadata) are being used * then the checksum seed feature is required to change the UUID. */ if (((ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb)) && !ext4_has_feature_csum_seed(sb)) || ext4_has_feature_stable_inodes(sb)) return -EOPNOTSUPP; if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid))) return -EFAULT; if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0) return -EINVAL; if (copy_from_user(uuid, &ufsuuid->fsu_uuid[0], UUID_SIZE)) return -EFAULT; ret = mnt_want_write_file(filp); if (ret) return ret; ret = ext4_update_superblocks_fn(sb, ext4_sb_setuuid, &uuid); mnt_drop_write_file(filp); return ret; } static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct mnt_idmap *idmap = file_mnt_idmap(filp); ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case FS_IOC_GETFSMAP: return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETVERSION: case EXT4_IOC_GETVERSION_OLD: return put_user(inode->i_generation, (int __user *) arg); case EXT4_IOC_SETVERSION: case EXT4_IOC_SETVERSION_OLD: { handle_t *handle; struct ext4_iloc iloc; __u32 generation; int err; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (ext4_has_metadata_csum(inode->i_sb)) { ext4_warning(sb, "Setting inode version is not " "supported with metadata_csum enabled."); return -ENOTTY; } err = mnt_want_write_file(filp); if (err) return err; if (get_user(generation, (int __user *) arg)) { err = -EFAULT; goto setversion_out; } inode_lock(inode); handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto unlock_out; } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode_set_ctime_current(inode); inode_inc_iversion(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); unlock_out: inode_unlock(inode); setversion_out: mnt_drop_write_file(filp); return err; } case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; int err, err2=0; err = ext4_resize_begin(sb); if (err) return err; if (get_user(n_blocks_count, (__u32 __user *)arg)) { err = -EFAULT; goto group_extend_out; } if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; goto group_extend_out; } err = mnt_want_write_file(filp); if (err) goto group_extend_out; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(filp); group_extend_out: err2 = ext4_resize_end(sb, false); if (err == 0) err = err2; return err; } case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct fd donor; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; me.moved_len = 0; donor = fdget(me.donor_fd); if (!donor.file) return -EBADF; if (!(donor.file->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); err = -EOPNOTSUPP; goto mext_out; } else if (IS_DAX(inode)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with DAX"); err = -EOPNOTSUPP; goto mext_out; } err = mnt_want_write_file(filp); if (err) goto mext_out; err = ext4_move_extents(filp, donor.file, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) err = -EFAULT; mext_out: fdput(donor); return err; } case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) return -EFAULT; return ext4_ioctl_group_add(filp, &input); } case EXT4_IOC_MIGRATE: { int err; if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); if (err) return err; /* * inode_mutex prevent write and truncate on the file. * Read still goes through. We take i_data_sem in * ext4_ext_swap_inode_data before we switch the * inode format to prevent read. */ inode_lock((inode)); err = ext4_ext_migrate(inode); inode_unlock((inode)); mnt_drop_write_file(filp); return err; } case EXT4_IOC_ALLOC_DA_BLKS: { int err; if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); if (err) return err; err = ext4_alloc_da_blocks(inode); mnt_drop_write_file(filp); return err; } case EXT4_IOC_SWAP_BOOT: { int err; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; err = mnt_want_write_file(filp); if (err) return err; err = swap_inode_boot_loader(sb, idmap, inode); mnt_drop_write_file(filp); return err; } case EXT4_IOC_RESIZE_FS: { ext4_fsblk_t n_blocks_count; int err = 0, err2 = 0; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; } err = ext4_resize_begin(sb); if (err) return err; err = mnt_want_write_file(filp); if (err) goto resizefs_out; err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(filp); if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, o_group); resizefs_out: err2 = ext4_resize_end(sb, true); if (err == 0) err = err2; return err; } case FITRIM: { struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; /* * We haven't replayed the journal, so we cannot use our * block-bitmap-guided storage zapping commands. */ if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) return -EROFS; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; return 0; } case EXT4_IOC_PRECACHE_EXTENTS: return ext4_ext_precache(inode); case FS_IOC_SET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); case FS_IOC_GET_ENCRYPTION_PWSALT: return ext4_ioctl_get_encryption_pwsalt(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_POLICY_EX: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); case FS_IOC_ADD_ENCRYPTION_KEY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_add_key(filp, (void __user *)arg); case FS_IOC_REMOVE_ENCRYPTION_KEY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_remove_key(filp, (void __user *)arg); case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_KEY_STATUS: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_NONCE: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); case EXT4_IOC_CLEAR_ES_CACHE: { if (!inode_owner_or_capable(idmap, inode)) return -EACCES; ext4_clear_inode_es(inode); return 0; } case EXT4_IOC_GETSTATE: { __u32 state = 0; if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED)) state |= EXT4_STATE_FLAG_EXT_PRECACHED; if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) state |= EXT4_STATE_FLAG_NEW; if (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) state |= EXT4_STATE_FLAG_NEWENTRY; if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) state |= EXT4_STATE_FLAG_DA_ALLOC_CLOSE; return put_user(state, (__u32 __user *) arg); } case EXT4_IOC_GET_ES_CACHE: return ext4_ioctl_get_es_cache(filp, arg); case EXT4_IOC_SHUTDOWN: return ext4_ioctl_shutdown(sb, arg); case FS_IOC_ENABLE_VERITY: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_enable(filp, (const void __user *)arg); case FS_IOC_MEASURE_VERITY: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_measure(filp, (void __user *)arg); case FS_IOC_READ_VERITY_METADATA: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); case EXT4_IOC_CHECKPOINT: return ext4_ioctl_checkpoint(filp, arg); case FS_IOC_GETFSLABEL: return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg); case FS_IOC_SETFSLABEL: return ext4_ioctl_setlabel(filp, (const void __user *)arg); case EXT4_IOC_GETFSUUID: return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg); case EXT4_IOC_SETFSUUID: return ext4_ioctl_setuuid(filp, (const void __user *)arg); default: return -ENOTTY; } } long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return __ext4_ioctl(filp, cmd, arg); } #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETVERSION: cmd = EXT4_IOC_GETVERSION; break; case EXT4_IOC32_SETVERSION: cmd = EXT4_IOC_SETVERSION; break; case EXT4_IOC32_GROUP_EXTEND: cmd = EXT4_IOC_GROUP_EXTEND; break; case EXT4_IOC32_GETVERSION_OLD: cmd = EXT4_IOC_GETVERSION_OLD; break; case EXT4_IOC32_SETVERSION_OLD: cmd = EXT4_IOC_SETVERSION_OLD; break; case EXT4_IOC32_GETRSVSZ: cmd = EXT4_IOC_GETRSVSZ; break; case EXT4_IOC32_SETRSVSZ: cmd = EXT4_IOC_SETRSVSZ; break; case EXT4_IOC32_GROUP_ADD: { struct compat_ext4_new_group_input __user *uinput; struct ext4_new_group_data input; int err; uinput = compat_ptr(arg); err = get_user(input.group, &uinput->group); err |= get_user(input.block_bitmap, &uinput->block_bitmap); err |= get_user(input.inode_bitmap, &uinput->inode_bitmap); err |= get_user(input.inode_table, &uinput->inode_table); err |= get_user(input.blocks_count, &uinput->blocks_count); err |= get_user(input.reserved_blocks, &uinput->reserved_blocks); if (err) return -EFAULT; return ext4_ioctl_group_add(file, &input); } case EXT4_IOC_MOVE_EXT: case EXT4_IOC_RESIZE_FS: case FITRIM: case EXT4_IOC_PRECACHE_EXTENTS: case FS_IOC_SET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_PWSALT: case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case FS_IOC_GET_ENCRYPTION_NONCE: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: case FS_IOC_READ_VERITY_METADATA: case EXT4_IOC_CLEAR_ES_CACHE: case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: case EXT4_IOC_CHECKPOINT: case FS_IOC_GETFSLABEL: case FS_IOC_SETFSLABEL: case EXT4_IOC_GETFSUUID: case EXT4_IOC_SETFSUUID: break; default: return -ENOIOCTLCMD; } return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif static void set_overhead(struct ext4_super_block *es, const void *arg) { es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg)); } int ext4_update_overhead(struct super_block *sb, bool force) { struct ext4_sb_info *sbi = EXT4_SB(sb); if (sb_rdonly(sb)) return 0; if (!force && (sbi->s_overhead == 0 || sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))) return 0; return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead); } |
1200 1198 8 1 8 402 || // SPDX-License-Identifier: GPL-2.0-or-later /* * "TEE" target extension for Xtables * Copyright © Sebastian Claßen, 2007 * Jan Engelhardt, 2007-2010 * * based on ipt_ROUTE.c from Cédric de Launois * <delaunois@info.ucl.be> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/route.h> #include <linux/netfilter/x_tables.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/route.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h> #include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> struct xt_tee_priv { struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; static unsigned int tee_net_id __read_mostly; static const union nf_inet_addr tee_zero_address; struct tee_net { struct list_head priv_list; /* lock protects the priv_list */ struct mutex lock; }; static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } #endif static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tee_net *tn = net_generic(net, tee_net_id); struct xt_tee_priv *priv; mutex_lock(&tn->lock); list_for_each_entry(priv, &tn->priv_list, list) { switch (event) { case NETDEV_REGISTER: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; break; case NETDEV_UNREGISTER: if (dev->ifindex == priv->oif) priv->oif = -1; break; case NETDEV_CHANGENAME: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; else if (dev->ifindex == priv->oif) priv->oif = -1; break; } } mutex_unlock(&tn->lock); return NOTIFY_DONE; } static int tee_tg_check(const struct xt_tgchk_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; struct xt_tee_priv *priv; /* 0.0.0.0 and :: not allowed */ if (memcmp(&info->gw, &tee_zero_address, sizeof(tee_zero_address)) == 0) return -EINVAL; if (info->oif[0]) { struct net_device *dev; if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; priv->tginfo = info; priv->oif = -1; info->priv = priv; dev = dev_get_by_name(par->net, info->oif); if (dev) { priv->oif = dev->ifindex; dev_put(dev); } mutex_lock(&tn->lock); list_add(&priv->list, &tn->priv_list); mutex_unlock(&tn->lock); } else info->priv = NULL; static_key_slow_inc(&xt_tee_enabled); return 0; } static void tee_tg_destroy(const struct xt_tgdtor_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { mutex_lock(&tn->lock); list_del(&info->priv->list); mutex_unlock(&tn->lock); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { { .name = "TEE", .revision = 1, .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TEE", .revision = 1, .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #endif }; static int __net_init tee_net_init(struct net *net) { struct tee_net *tn = net_generic(net, tee_net_id); INIT_LIST_HEAD(&tn->priv_list); mutex_init(&tn->lock); return 0; } static struct pernet_operations tee_net_ops = { .init = tee_net_init, .id = &tee_net_id, .size = sizeof(struct tee_net), }; static struct notifier_block tee_netdev_notifier = { .notifier_call = tee_netdev_event, }; static int __init tee_tg_init(void) { int ret; ret = register_pernet_subsys(&tee_net_ops); if (ret < 0) return ret; ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); if (ret < 0) goto cleanup_subsys; ret = register_netdevice_notifier(&tee_netdev_notifier); if (ret < 0) goto unregister_targets; return 0; unregister_targets: xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); cleanup_subsys: unregister_pernet_subsys(&tee_net_ops); return ret; } static void __exit tee_tg_exit(void) { unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); unregister_pernet_subsys(&tee_net_ops); } module_init(tee_tg_init); module_exit(tee_tg_exit); MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Reroute packet copy"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TEE"); MODULE_ALIAS("ip6t_TEE"); |
4 4 1 5 || /* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) /* node block offset on the NAT area dedicated to the given start node id */ #define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ #define FREE_NID_PAGES 8 #define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) /* size of free nid batch when shrinking */ #define SHRINK_NID_BATCH_SIZE 8 #define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ #define MAX_RA_NODE 128 /* control the memory footprint threshold (10MB per 1GB ram) */ #define DEF_RAM_THRESHOLD 1 /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 /* control total # of node writes used for roll-fowrad recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ #define NAT_VEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 /* check pinned file's alignment status of physical blocks */ #define FILE_NOT_ALIGNED 1 /* For flag in struct node_info */ enum { IS_CHECKPOINTED, /* is it checkpointed before? */ HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ IS_PREALLOC, /* nat entry is preallocated */ }; /* * For node information */ struct node_info { nid_t nid; /* node id */ nid_t ino; /* inode number of the node's owner */ block_t blk_addr; /* block address of the node */ unsigned char version; /* version of the node */ unsigned char flag; /* for node information bits */ }; struct nat_entry { struct list_head list; /* for clean or dirty nat list */ struct node_info ni; /* in-memory node information */ }; #define nat_get_nid(nat) ((nat)->ni.nid) #define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) #define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) #define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) #define nat_get_ino(nat) ((nat)->ni.ino) #define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) #define nat_get_version(nat) ((nat)->ni.version) #define nat_set_version(nat, v) ((nat)->ni.version = (v)) #define inc_node_version(version) (++(version)) static inline void copy_node_info(struct node_info *dst, struct node_info *src) { dst->nid = src->nid; dst->ino = src->ino; dst->blk_addr = src->blk_addr; dst->version = src->version; /* should not copy flag here */ } static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { if (set) ne->ni.flag |= BIT(type); else ne->ni.flag &= ~BIT(type); } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { return ne->ni.flag & BIT(type); } static inline void nat_reset_flag(struct nat_entry *ne) { /* these states can be set only after checkpoint was done */ set_nat_flag(ne, IS_CHECKPOINTED, true); set_nat_flag(ne, HAS_FSYNCED_INODE, false); set_nat_flag(ne, HAS_LAST_FSYNC, true); } static inline void node_info_from_raw_nat(struct node_info *ni, struct f2fs_nat_entry *raw_ne) { ni->ino = le32_to_cpu(raw_ne->ino); ni->blk_addr = le32_to_cpu(raw_ne->block_addr); ni->version = raw_ne->version; } static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, struct node_info *ni) { raw_ne->ino = cpu_to_le32(ni->ino); raw_ne->block_addr = cpu_to_le32(ni->blk_addr); raw_ne->version = ni->version; } static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ READ_EXTENT_CACHE, /* indicates read extent cache */ AGE_EXTENT_CACHE, /* indicates age extent cache */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ }; struct nat_entry_set { struct list_head set_list; /* link with other nat sets */ struct list_head entry_list; /* link with dirty nat entries */ nid_t set; /* set number*/ unsigned int entry_cnt; /* the # of nat entries in set */ }; struct free_nid { struct list_head list; /* for free node id list */ nid_t nid; /* node id */ int state; /* in use or not: FREE_NID or PREALLOC_NID */ }; static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *fnid; spin_lock(&nm_i->nid_list_lock); if (nm_i->nid_cnt[FREE_NID] <= 0) { spin_unlock(&nm_i->nid_list_lock); return; } fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); } /* * inline functions */ static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) { struct f2fs_nm_info *nm_i = NM_I(sbi); #ifdef CONFIG_F2FS_CHECK_FS if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir, nm_i->bitmap_size)) f2fs_bug_on(sbi, 1); #endif memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); } static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) { struct f2fs_nm_info *nm_i = NM_I(sbi); pgoff_t block_off; pgoff_t block_addr; /* * block_off = segment_off * 512 + off_in_segment * OLD = (segment_off * 512) * 2 + off_in_segment * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment */ block_off = NAT_BLOCK_OFFSET(start); block_addr = (pgoff_t)(nm_i->nat_blkaddr + (block_off << 1) - (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) block_addr += sbi->blocks_per_seg; return block_addr; } static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, pgoff_t block_addr) { struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; block_addr ^= BIT(sbi->log_blocks_per_seg); return block_addr + nm_i->nat_blkaddr; } static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); f2fs_change_bit(block_off, nm_i->nat_bitmap); #ifdef CONFIG_F2FS_CHECK_FS f2fs_change_bit(block_off, nm_i->nat_bitmap_mir); #endif } static inline nid_t ino_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline __u64 cpver_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); } static inline block_t next_blkaddr_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.next_blkaddr); } static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int old_flag = 0; if (reset) memset(rn, 0, sizeof(*rn)); else old_flag = le32_to_cpu(rn->footer.flag); rn->footer.nid = cpu_to_le32(nid); rn->footer.ino = cpu_to_le32(ino); /* should remain old flag bits such as COLD_BIT_SHIFT */ rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | (old_flag & OFFSET_BIT_MASK)); } static inline void copy_node_footer(struct page *dst, struct page *src) { struct f2fs_node *src_rn = F2FS_NODE(src); struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_node *rn = F2FS_NODE(page); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); rn->footer.cp_ver = cpu_to_le64(cp_ver); rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } static inline bool is_recoverable_dnode(struct page *page) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) return (cp_ver << 32) == (cpver_of_node(page) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); return cp_ver == cpver_of_node(page); } /* * f2fs assigns the following node offsets described as (num). * N = NIDS_PER_BLOCK * * Inode block (0) * |- direct node (1) * |- direct node (2) * |- indirect node (3) * | `- direct node (4 => 4 + N - 1) * |- indirect node (4 + N) * | `- direct node (5 + N => 5 + 2N - 1) * `- double indirect node (5 + 2N) * `- indirect node (6 + 2N) * `- direct node * ...... * `- indirect node ((6 + 2N) + x(N + 1)) * `- direct node * ...... * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ static inline bool IS_DNODE(struct page *node_page) { unsigned int ofs = ofs_of_node(node_page); if (f2fs_has_xattr_block(ofs)) return true; if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || ofs == 5 + 2 * NIDS_PER_BLOCK) return false; if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { ofs -= 6 + 2 * NIDS_PER_BLOCK; if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) return false; } return true; } static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); return set_page_dirty(p); } static inline nid_t get_nid(struct page *p, int off, bool i) { struct f2fs_node *rn = F2FS_NODE(p); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); return le32_to_cpu(rn->in.nid[off]); } /* * Coldness identification: * - Mark cold files in f2fs_inode_info * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ static inline int is_node(struct page *page, int type) { struct f2fs_node *rn = F2FS_NODE(page); return le32_to_cpu(rn->footer.flag) & BIT(type); } #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) flag &= ~BIT(COLD_BIT_SHIFT); else flag |= BIT(COLD_BIT_SHIFT); rn->footer.flag = cpu_to_le32(flag); } static inline void set_mark(struct page *page, int mark, int type) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); else flag &= ~BIT(type); rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS f2fs_inode_chksum_set(F2FS_P_SB(page), page); #endif } #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) |
1 || /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * i2c.h - definitions for the Linux i2c bus interface * Copyright (C) 1995-2000 Simon G. Vogl * Copyright (C) 2013-2019 Wolfram Sang <wsa@kernel.org> * * With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi> and * Frodo Looijaard <frodol@dds.nl> */ #ifndef _LINUX_I2C_H #define _LINUX_I2C_H #include <linux/acpi.h> /* for acpi_handle */ #include <linux/bits.h> #include <linux/mod_devicetable.h> #include <linux/device.h> /* for struct device */ #include <linux/sched.h> /* for completion */ #include <linux/mutex.h> #include <linux/regulator/consumer.h> #include <linux/rtmutex.h> #include <linux/irqdomain.h> /* for Host Notify IRQ */ #include <linux/of.h> /* for struct device_node */ #include <linux/swab.h> /* for swab16 */ #include <uapi/linux/i2c.h> extern struct bus_type i2c_bus_type; extern struct device_type i2c_adapter_type; extern struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ struct i2c_msg; struct i2c_algorithm; struct i2c_adapter; struct i2c_client; struct i2c_driver; struct i2c_device_identity; union i2c_smbus_data; struct i2c_board_info; enum i2c_slave_event; typedef int (*i2c_slave_cb_t)(struct i2c_client *client, enum i2c_slave_event event, u8 *val); /* I2C Frequency Modes */ #define I2C_MAX_STANDARD_MODE_FREQ 100000 #define I2C_MAX_FAST_MODE_FREQ 400000 #define I2C_MAX_FAST_MODE_PLUS_FREQ 1000000 #define I2C_MAX_TURBO_MODE_FREQ 1400000 #define I2C_MAX_HIGH_SPEED_MODE_FREQ 3400000 #define I2C_MAX_ULTRA_FAST_MODE_FREQ 5000000 struct module; struct property_entry; #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); /* * The master routines are the ones normally used to transmit data to devices * on a bus (or read from them). Apart from two basic transfer functions to * transmit one message at a time, a more complex version can be used to * transmit an arbitrary number of messages without interruption. * @count must be less than 64k since msg.len is u16. */ int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf, int count, u16 flags); /** * i2c_master_recv - issue a single I2C message in master receive mode * @client: Handle to slave device * @buf: Where to store data read from slave * @count: How many bytes to read, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes read. */ static inline int i2c_master_recv(const struct i2c_client *client, char *buf, int count) { return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD); }; /** * i2c_master_recv_dmasafe - issue a single I2C message in master receive mode * using a DMA safe buffer * @client: Handle to slave device * @buf: Where to store data read from slave, must be safe to use with DMA * @count: How many bytes to read, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes read. */ static inline int i2c_master_recv_dmasafe(const struct i2c_client *client, char *buf, int count) { return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD | I2C_M_DMA_SAFE); }; /** * i2c_master_send - issue a single I2C message in master transmit mode * @client: Handle to slave device * @buf: Data that will be written to the slave * @count: How many bytes to write, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes written. */ static inline int i2c_master_send(const struct i2c_client *client, const char *buf, int count) { return i2c_transfer_buffer_flags(client, (char *)buf, count, 0); }; /** * i2c_master_send_dmasafe - issue a single I2C message in master transmit mode * using a DMA safe buffer * @client: Handle to slave device * @buf: Data that will be written to the slave, must be safe to use with DMA * @count: How many bytes to write, must be less than 64k since msg.len is u16 * * Returns negative errno, or else the number of bytes written. */ static inline int i2c_master_send_dmasafe(const struct i2c_client *client, const char *buf, int count) { return i2c_transfer_buffer_flags(client, (char *)buf, count, I2C_M_DMA_SAFE); }; /* Transfer num messages. */ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* Unlocked flavor */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); /* This is the very generalized SMBus access routine. You probably do not want to use this, though; one of the functions below may be much easier, and probably just as fast. Note that we use i2c_adapter here, because you do not need a specific smbus adapter to call this function. */ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data); /* Unlocked flavor */ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, union i2c_smbus_data *data); /* Now follow the 'nice' access routines. These also document the calling conventions of i2c_smbus_xfer. */ u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count); s32 i2c_smbus_read_byte(const struct i2c_client *client); s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value); s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command); s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command, u8 value); s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command); s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command, u16 value); static inline s32 i2c_smbus_read_word_swapped(const struct i2c_client *client, u8 command) { s32 value = i2c_smbus_read_word_data(client, command); return (value < 0) ? value : swab16(value); } static inline s32 i2c_smbus_write_word_swapped(const struct i2c_client *client, u8 command, u16 value) { return i2c_smbus_write_word_data(client, command, swab16(value)); } /* Returns the number of read bytes */ s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command, u8 *values); s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values); /* Returns the number of read bytes */ s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, u8 *values); s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values); s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 command, u8 length, u8 *values); int i2c_get_device_id(const struct i2c_client *client, struct i2c_device_identity *id); const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client); #endif /* I2C */ /** * struct i2c_device_identity - i2c client device identification * @manufacturer_id: 0 - 4095, database maintained by NXP * @part_id: 0 - 511, according to manufacturer * @die_revision: 0 - 7, according to manufacturer */ struct i2c_device_identity { u16 manufacturer_id; #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS 0 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_1 1 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_2 2 #define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_3 3 #define I2C_DEVICE_ID_RAMTRON_INTERNATIONAL 4 #define I2C_DEVICE_ID_ANALOG_DEVICES 5 #define I2C_DEVICE_ID_STMICROELECTRONICS 6 #define I2C_DEVICE_ID_ON_SEMICONDUCTOR 7 #define I2C_DEVICE_ID_SPRINTEK_CORPORATION 8 #define I2C_DEVICE_ID_ESPROS_PHOTONICS_AG 9 #define I2C_DEVICE_ID_FUJITSU_SEMICONDUCTOR 10 #define I2C_DEVICE_ID_FLIR 11 #define I2C_DEVICE_ID_O2MICRO 12 #define I2C_DEVICE_ID_ATMEL 13 #define I2C_DEVICE_ID_NONE 0xffff u16 part_id; u8 die_revision; }; enum i2c_alert_protocol { I2C_PROTOCOL_SMBUS_ALERT, I2C_PROTOCOL_SMBUS_HOST_NOTIFY, }; /** * enum i2c_driver_flags - Flags for an I2C device driver * * @I2C_DRV_ACPI_WAIVE_D0_PROBE: Don't put the device in D0 state for probe */ enum i2c_driver_flags { I2C_DRV_ACPI_WAIVE_D0_PROBE = BIT(0), }; /** * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol * @command: Callback for bus-wide signaling (optional) * @driver: Device driver model driver * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. * * For automatic device detection, both @detect and @address_list must * be defined. @class should also be set, otherwise only devices forced * with module parameters will be created. The detect function must * fill at least the name field of the i2c_board_info structure it is * handed upon successful detection, and possibly also the flags field. * * If @detect is missing, the driver will still work fine for enumerated * devices. Detected devices simply won't be supported. This is expected * for the many I2C/SMBus devices which can't be detected reliably, and * the ones which can always be enumerated in practice. * * The i2c_client structure which is handed to the @detect callback is * not a real i2c_client. It is initialized just enough so that you can * call i2c_smbus_read_byte_data and friends on it. Don't do anything * else with it. In particular, calling dev_dbg and friends on it is * not allowed. */ struct i2c_driver { unsigned int class; /* Standard driver model interfaces */ int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); /* driver model interfaces that don't relate to enumeration */ void (*shutdown)(struct i2c_client *client); /* Alert callback, for example for the SMBus alert protocol. * The format and meaning of the data value depends on the protocol. * For the SMBus alert protocol, there is a single bit of data passed * as the alert response's low bit ("event flag"). * For the SMBus Host Notify protocol, the data corresponds to the * 16-bit payload data reported by the slave device acting as master. */ void (*alert)(struct i2c_client *client, enum i2c_alert_protocol protocol, unsigned int data); /* a ioctl like command that can be used to perform specific functions * with the device. */ int (*command)(struct i2c_client *client, unsigned int cmd, void *arg); struct device_driver driver; const struct i2c_device_id *id_table; /* Device detection callback for automatic device creation */ int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; u32 flags; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) /** * struct i2c_client - represent an I2C slave device * @flags: see I2C_CLIENT_* for possible flags * @addr: Address used on the I2C bus connected to the parent adapter. * @name: Indicates the type of the device, usually a chip name that's * generic enough to hide second-sourcing and compatible revisions. * @adapter: manages the bus segment hosting this I2C device * @dev: Driver model device node for the slave. * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) * @detected: member of an i2c_driver.clients list or i2c-core's * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources * acquired when probing this device. * * An i2c_client identifies a single device (i.e. chip) connected to an * i2c bus. The behaviour exposed to Linux is defined by the driver * managing the device. */ struct i2c_client { unsigned short flags; /* div., see below */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ unsigned short addr; /* chip address - NOTE: 7bit */ /* addresses are stored in the */ /* _LOWER_ 7 bits */ char name[I2C_NAME_SIZE]; struct i2c_adapter *adapter; /* the adapter we sit on */ struct device dev; /* the device structure */ int init_irq; /* irq set at initialization */ int irq; /* irq issued by device */ struct list_head detected; #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif void *devres_group_id; /* ID of probe devres group */ }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) struct i2c_adapter *i2c_verify_adapter(struct device *dev); const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, const struct i2c_client *client); const void *i2c_get_match_data(const struct i2c_client *client); static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { struct device * const dev = kobj_to_dev(kobj); return to_i2c_client(dev); } static inline void *i2c_get_clientdata(const struct i2c_client *client) { return dev_get_drvdata(&client->dev); } static inline void i2c_set_clientdata(struct i2c_client *client, void *data) { dev_set_drvdata(&client->dev, data); } /* I2C slave support */ enum i2c_slave_event { I2C_SLAVE_READ_REQUESTED, I2C_SLAVE_WRITE_REQUESTED, I2C_SLAVE_READ_PROCESSED, I2C_SLAVE_WRITE_RECEIVED, I2C_SLAVE_STOP, }; int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); int i2c_slave_unregister(struct i2c_client *client); int i2c_slave_event(struct i2c_client *client, enum i2c_slave_event event, u8 *val); #if IS_ENABLED(CONFIG_I2C_SLAVE) bool i2c_detect_slave_mode(struct device *dev); #else static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } #endif /** * struct i2c_board_info - template for device creation * @type: chip type, to initialize i2c_client.name * @flags: to initialize i2c_client.flags * @addr: stored in i2c_client.addr * @dev_name: Overrides the default <busnr>-<addr> dev_name if set * @platform_data: stored in i2c_client.dev.platform_data * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware * @swnode: software node for the device * @resources: resources associated with the device * @num_resources: number of resources in the @resources array * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and * devices may be able to use I2C_SMBUS_QUICK to tell whether or not there's * a device at a given address. Drivers commonly need more information than * that, such as chip type, configuration, associated IRQ, and so on. * * i2c_board_info is used to build tables of information listing I2C devices * that are present. This information is used to grow the driver model tree. * For mainboards this is done statically using i2c_register_board_info(); * bus numbers identify adapters that aren't yet available. For add-on boards, * i2c_new_client_device() does this dynamically with the adapter already known. */ struct i2c_board_info { char type[I2C_NAME_SIZE]; unsigned short flags; unsigned short addr; const char *dev_name; void *platform_data; struct device_node *of_node; struct fwnode_handle *fwnode; const struct software_node *swnode; const struct resource *resources; unsigned int num_resources; int irq; }; /** * I2C_BOARD_INFO - macro used to list an i2c device and its address * @dev_type: identifies the device type * @dev_addr: the device's address on the bus. * * This macro initializes essential fields of a struct i2c_board_info, * declaring what has been provided on a particular board. Optional * fields (such as associated irq, or device-specific platform_data) * are provided using conventional syntax. */ #define I2C_BOARD_INFO(dev_type, dev_addr) \ .type = dev_type, .addr = (dev_addr) #if IS_ENABLED(CONFIG_I2C) /* * Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); /* If you don't know the exact address of an I2C device, use this variant * instead, which can probe for device presence in a list of possible * addresses. The "probe" callback function is optional. If it is provided, * it must return 1 on successful probe, 0 otherwise. If it is not provided, * a default probing method is used. */ struct i2c_client * i2c_new_scanned_device(struct i2c_adapter *adap, struct i2c_board_info *info, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); /* Common custom probe functions */ int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); struct i2c_client * i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address); struct i2c_client * devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address); struct i2c_client * i2c_new_ancillary_device(struct i2c_client *client, const char *name, u16 default_addr); void i2c_unregister_device(struct i2c_client *client); struct i2c_client *i2c_verify_client(struct device *dev); #else static inline struct i2c_client *i2c_verify_client(struct device *dev) { return NULL; } #endif /* I2C */ /* Mainboard arch_initcall() code should register all its I2C devices. * This is done at arch_initcall time, before declaring any i2c adapters. * Modules for add-on boards must use other calls. */ #ifdef CONFIG_I2C_BOARDINFO int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); #else static inline int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n) { return 0; } #endif /* I2C_BOARDINFO */ /** * struct i2c_algorithm - represent I2C transfer method * @master_xfer: Issue a set of i2c transactions to the given I2C adapter * defined by the msgs array, with num messages available to transfer via * the adapter specified by adap. * @master_xfer_atomic: same as @master_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this * is not present, then the bus layer will try and convert the SMBus calls * into I2C transfers instead. * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @functionality: Return the flags that this algorithm/adapter pair supports * from the ``I2C_FUNC_*`` flags. * @reg_slave: Register given client to I2C slave mode of this adapter * @unreg_slave: Unregister given client from I2C slave mode of this adapter * * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584 * to name two of the most common. * * The return codes from the ``master_xfer{_atomic}`` fields should indicate the * type of error code that occurred during the transfer, as documented in the * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the * number of messages executed should be returned. */ struct i2c_algorithm { /* * If an adapter algorithm can't do I2C-level access, set master_xfer * to NULL. If an adapter algorithm can do SMBus access, set * smbus_xfer. If set to NULL, the SMBus protocol is simulated * using common I2C messages. * * master_xfer should return the number of messages successfully * processed, or a negative value on error */ int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); int (*master_xfer_atomic)(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); int (*smbus_xfer)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); int (*smbus_xfer_atomic)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); /* To determine what the adapter supports */ u32 (*functionality)(struct i2c_adapter *adap); #if IS_ENABLED(CONFIG_I2C_SLAVE) int (*reg_slave)(struct i2c_client *client); int (*unreg_slave)(struct i2c_client *client); #endif }; /** * struct i2c_lock_operations - represent I2C locking operations * @lock_bus: Get exclusive access to an I2C bus segment * @trylock_bus: Try to get exclusive access to an I2C bus segment * @unlock_bus: Release exclusive access to an I2C bus segment * * The main operations are wrapped by i2c_lock_bus and i2c_unlock_bus. */ struct i2c_lock_operations { void (*lock_bus)(struct i2c_adapter *adapter, unsigned int flags); int (*trylock_bus)(struct i2c_adapter *adapter, unsigned int flags); void (*unlock_bus)(struct i2c_adapter *adapter, unsigned int flags); }; /** * struct i2c_timings - I2C timing information * @bus_freq_hz: the bus frequency in Hz * @scl_rise_ns: time SCL signal takes to rise in ns; t(r) in the I2C specification * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification * @sda_hold_ns: time IP core additionally needs to hold SDA in ns * @digital_filter_width_ns: width in ns of spikes on i2c lines that the IP core * digital filter can filter out * @analog_filter_cutoff_freq_hz: threshold frequency for the low pass IP core * analog filter */ struct i2c_timings { u32 bus_freq_hz; u32 scl_rise_ns; u32 scl_fall_ns; u32 scl_int_delay_ns; u32 sda_fall_ns; u32 sda_hold_ns; u32 digital_filter_width_ns; u32 analog_filter_cutoff_freq_hz; }; /** * struct i2c_bus_recovery_info - I2C bus recovery information * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or * i2c_generic_scl_recovery(). * @get_scl: This gets current value of SCL line. Mandatory for generic SCL * recovery. Populated internally for generic GPIO recovery. * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery. * Populated internally for generic GPIO recovery. * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory * for generic SCL recovery. Populated internally, if sda_gpio is a valid * GPIO, for generic GPIO recovery. * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for * generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO, * for generic GPIO recovery. * @get_bus_free: Returns the bus free state as seen from the IP core in case it * has a more complex internal logic than just reading SDA. Optional. * @prepare_recovery: This will be called before starting recovery. Platform may * configure padmux here for SDA/SCL line or something else they want. * @unprepare_recovery: This will be called after completing recovery. Platform * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. * Optional. * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned * to the I2C bus. Optional. Populated internally for GPIO recovery, if * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as * GPIOs. Optional. Populated internally for GPIO recovery, if this state * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); int (*get_scl)(struct i2c_adapter *adap); void (*set_scl)(struct i2c_adapter *adap, int val); int (*get_sda)(struct i2c_adapter *adap); void (*set_sda)(struct i2c_adapter *adap, int val); int (*get_bus_free)(struct i2c_adapter *adap); void (*prepare_recovery)(struct i2c_adapter *adap); void (*unprepare_recovery)(struct i2c_adapter *adap); /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; struct pinctrl *pinctrl; struct pinctrl_state *pins_default; struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); /* Generic recovery routines */ int i2c_generic_scl_recovery(struct i2c_adapter *adap); /** * struct i2c_adapter_quirks - describe flaws of an i2c adapter * @flags: see I2C_AQ_* for possible flags and read below * @max_num_msgs: maximum number of messages per transfer * @max_write_len: maximum length of a write message * @max_read_len: maximum length of a read message * @max_comb_1st_msg_len: maximum length of the first msg in a combined message * @max_comb_2nd_msg_len: maximum length of the second msg in a combined message * * Note about combined messages: Some I2C controllers can only send one message * per transfer, plus something called combined message or write-then-read. * This is (usually) a small write message followed by a read message and * barely enough to access register based devices like EEPROMs. There is a flag * to support this mode. It implies max_num_msg = 2 and does the length checks * with max_comb_*_len because combined message mode usually has its own * limitations. Because of HW implementations, some controllers can actually do * write-then-anything or other variants. To support that, write-then-read has * been broken out into smaller bits like write-first and read-second which can * be combined as needed. */ struct i2c_adapter_quirks { u64 flags; int max_num_msgs; u16 max_write_len; u16 max_read_len; u16 max_comb_1st_msg_len; u16 max_comb_2nd_msg_len; }; /* enforce max_num_msgs = 2 and use max_comb_*_len for length checks */ #define I2C_AQ_COMB BIT(0) /* first combined message must be write */ #define I2C_AQ_COMB_WRITE_FIRST BIT(1) /* second combined message must be read */ #define I2C_AQ_COMB_READ_SECOND BIT(2) /* both combined messages must have the same target address */ #define I2C_AQ_COMB_SAME_ADDR BIT(3) /* convenience macro for typical write-then read case */ #define I2C_AQ_COMB_WRITE_THEN_READ (I2C_AQ_COMB | I2C_AQ_COMB_WRITE_FIRST | \ I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR) /* clock stretching is not supported */ #define I2C_AQ_NO_CLK_STRETCH BIT(4) /* message cannot have length of 0 */ #define I2C_AQ_NO_ZERO_LEN_READ BIT(5) #define I2C_AQ_NO_ZERO_LEN_WRITE BIT(6) #define I2C_AQ_NO_ZERO_LEN (I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE) /* adapter cannot do repeated START */ #define I2C_AQ_NO_REP_START BIT(7) /* * i2c_adapter is the structure used to identify a physical i2c bus along * with the access algorithms necessary to access it. */ struct i2c_adapter { struct module *owner; unsigned int class; /* classes to allow probing for */ const struct i2c_algorithm *algo; /* the algorithm to access the bus */ void *algo_data; /* data fields that are valid for all devices */ const struct i2c_lock_operations *lock_ops; struct rt_mutex bus_lock; struct rt_mutex mux_lock; int timeout; /* in jiffies */ int retries; struct device dev; /* the adapter device */ unsigned long locked_flags; /* owned by the I2C core */ #define I2C_ALF_IS_SUSPENDED 0 #define I2C_ALF_SUSPEND_REPORTED 1 int nr; char name[48]; struct completion dev_released; struct mutex userspace_clients_lock; struct list_head userspace_clients; struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; struct irq_domain *host_notify_domain; struct regulator *bus_regulator; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) static inline void *i2c_get_adapdata(const struct i2c_adapter *adap) { return dev_get_drvdata(&adap->dev); } static inline void i2c_set_adapdata(struct i2c_adapter *adap, void *data) { dev_set_drvdata(&adap->dev, data); } static inline struct i2c_adapter * i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) { #if IS_ENABLED(CONFIG_I2C_MUX) struct device *parent = adapter->dev.parent; if (parent != NULL && parent->type == &i2c_adapter_type) return to_i2c_adapter(parent); else #endif return NULL; } int i2c_for_each_dev(void *data, int (*fn)(struct device *dev, void *data)); /* Adapter locking functions, exported for shared pin cases */ #define I2C_LOCK_ROOT_ADAPTER BIT(0) #define I2C_LOCK_SEGMENT BIT(1) /** * i2c_lock_bus - Get exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT * locks only this branch in the adapter tree */ static inline void i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { adapter->lock_ops->lock_bus(adapter, flags); } /** * i2c_trylock_bus - Try to get exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER tries to locks the root i2c adapter, * I2C_LOCK_SEGMENT tries to lock only this branch in the adapter tree * * Return: true if the I2C bus segment is locked, false otherwise */ static inline int i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags) { return adapter->lock_ops->trylock_bus(adapter, flags); } /** * i2c_unlock_bus - Release exclusive access to an I2C bus segment * @adapter: Target I2C bus segment * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT * unlocks only this branch in the adapter tree */ static inline void i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) { adapter->lock_ops->unlock_bus(adapter, flags); } /** * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core * @adap: Adapter to mark as suspended * * When using this helper to mark an adapter as suspended, the core will reject * further transfers to this adapter. The usage of this helper is optional but * recommended for devices having distinct handlers for system suspend and * runtime suspend. More complex devices are free to implement custom solutions * to reject transfers when suspended. */ static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap) { i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); } /** * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core * @adap: Adapter to mark as resumed * * When using this helper to mark an adapter as resumed, the core will allow * further transfers to this adapter. See also further notes to * @i2c_mark_adapter_suspended(). */ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) { i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); } /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Warn users that the adapter doesn't support classes anymore */ #define I2C_CLASS_DEPRECATED (1<<8) /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU /* Construct an I2C_CLIENT_END-terminated array of i2c addresses */ #define I2C_ADDRS(addr, addrs...) \ ((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END }) /* ----- functions exported by i2c.o */ /* administration... */ #if IS_ENABLED(CONFIG_I2C) int i2c_add_adapter(struct i2c_adapter *adap); int devm_i2c_add_adapter(struct device *dev, struct i2c_adapter *adapter); void i2c_del_adapter(struct i2c_adapter *adap); int i2c_add_numbered_adapter(struct i2c_adapter *adap); int i2c_register_driver(struct module *owner, struct i2c_driver *driver); void i2c_del_driver(struct i2c_driver *driver); /* use a define to avoid include chaining to get THIS_MODULE */ #define i2c_add_driver(driver) \ i2c_register_driver(THIS_MODULE, driver) static inline bool i2c_client_has_driver(struct i2c_client *client) { return !IS_ERR_OR_NULL(client) && client->dev.driver; } /* call the i2c_client->command() of all attached clients with * the given arguments */ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg); struct i2c_adapter *i2c_get_adapter(int nr); void i2c_put_adapter(struct i2c_adapter *adap); unsigned int i2c_adapter_depth(struct i2c_adapter *adapter); void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults); /* Return the functionality mask */ static inline u32 i2c_get_functionality(struct i2c_adapter *adap) { return adap->algo->functionality(adap); } /* Return 1 if adapter supports everything we need, 0 if not. */ static inline int i2c_check_functionality(struct i2c_adapter *adap, u32 func) { return (func & i2c_get_functionality(adap)) == func; } /** * i2c_check_quirks() - Function for checking the quirk flags in an i2c adapter * @adap: i2c adapter * @quirks: quirk flags * * Return: true if the adapter has all the specified quirk flags, false if not */ static inline bool i2c_check_quirks(struct i2c_adapter *adap, u64 quirks) { if (!adap->quirks) return false; return (adap->quirks->flags & quirks) == quirks; } /* Return the adapter number for a specific adapter */ static inline int i2c_adapter_id(struct i2c_adapter *adap) { return adap->nr; } static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) { return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); /** * module_i2c_driver() - Helper macro for registering a modular I2C driver * @__i2c_driver: i2c_driver struct * * Helper macro for I2C drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_i2c_driver(__i2c_driver) \ module_driver(__i2c_driver, i2c_add_driver, \ i2c_del_driver) /** * builtin_i2c_driver() - Helper macro for registering a builtin I2C driver * @__i2c_driver: i2c_driver struct * * Helper macro for I2C drivers which do not do anything special in their * init. This eliminates a lot of boilerplate. Each driver may only * use this macro once, and calling it replaces device_initcall(). */ #define builtin_i2c_driver(__i2c_driver) \ builtin_driver(__i2c_driver, i2c_add_driver) #endif /* I2C */ /* must call put_device() when done with returned i2c_client device */ struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); /* must call put_device() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) { return i2c_find_device_by_fwnode(of_fwnode_handle(node)); } /* must call put_device() when done with returned i2c_adapter device */ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) { return i2c_find_adapter_by_fwnode(of_fwnode_handle(node)); } /* must call i2c_put_adapter() when done with returned i2c_adapter device */ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) { return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); } const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, struct i2c_client *client); int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info); #else static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) { return NULL; } static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) { return NULL; } static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) { return NULL; } static inline const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, struct i2c_client *client) { return NULL; } static inline int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info) { return -ENOTSUPP; } #endif /* CONFIG_OF */ struct acpi_resource; struct acpi_resource_i2c_serialbus; #if IS_ENABLED(CONFIG_ACPI) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode, int index, struct i2c_board_info *info); struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); bool i2c_acpi_waive_d0_probe(struct device *dev); #else static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c) { return false; } static inline int i2c_acpi_client_count(struct acpi_device *adev) { return 0; } static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } static inline struct i2c_client *i2c_acpi_new_device_by_fwnode( struct fwnode_handle *fwnode, int index, struct i2c_board_info *info) { return ERR_PTR(-ENODEV); } static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) { return NULL; } static inline bool i2c_acpi_waive_d0_probe(struct device *dev) { return false; } #endif /* CONFIG_ACPI */ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info) { return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info); } #endif /* _LINUX_I2C_H */ |
17 17 507 5 1 4 2 3 53 19 1 1 2 20 20 4 3 28 11 17 || // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 * * X86-64 port * Andi Kleen. * * CPU hotplug support - ashok.raj@intel.com */ /* * This file handles the architecture-dependent parts of process handling.. */ #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/elfcore.h> #include <linux/smp.h> #include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/ptrace.h> #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/prctl.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> #include <linux/syscalls.h> #include <linux/iommu.h> #include <asm/processor.h> #include <asm/pkru.h> #include <asm/fpu/sched.h> #include <asm/mmu_context.h> #include <asm/prctl.h> #include <asm/desc.h> #include <asm/proto.h> #include <asm/ia32.h> #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/xen/hypervisor.h> #include <asm/vdso.h> #include <asm/resctrl.h> #include <asm/unistd.h> #include <asm/fsgsbase.h> #ifdef CONFIG_IA32_EMULATION /* Not included via unistd.h */ #include <asm/unistd_32_ia32.h> #endif #include "process.h" /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, const char *log_lvl) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, es; show_iret_regs(regs, log_lvl); if (regs->orig_ax != -1) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); else pr_cont("\n"); printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n", log_lvl, regs->ax, regs->bx, regs->cx); printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n", log_lvl, regs->dx, regs->si, regs->di); printk("%sRBP: %016lx R08: %016lx R09: %016lx\n", log_lvl, regs->bp, regs->r8, regs->r9); printk("%sR10: %016lx R11: %016lx R12: %016lx\n", log_lvl, regs->r10, regs->r11, regs->r12); printk("%sR13: %016lx R14: %016lx R15: %016lx\n", log_lvl, regs->r13, regs->r14, regs->r15); if (mode == SHOW_REGS_SHORT) return; if (mode == SHOW_REGS_USER) { rdmsrl(MSR_FS_BASE, fs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); printk("%sFS: %016lx GS: %016lx\n", log_lvl, fs, shadowgs); return; } asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%es,%0" : "=r" (es)); asm("movl %%fs,%0" : "=r" (fsindex)); asm("movl %%gs,%0" : "=r" (gsindex)); rdmsrl(MSR_FS_BASE, fs); rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); cr0 = read_cr0(); cr2 = read_cr2(); cr3 = __read_cr3(); cr4 = __read_cr4(); printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", log_lvl, fs, fsindex, gs, gsindex, shadowgs); printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n", log_lvl, regs->cs, ds, es, cr0); printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n", log_lvl, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && (d6 == DR6_RESERVED) && (d7 == 0x400))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", log_lvl, d3, d6, d7); } if (cpu_feature_enabled(X86_FEATURE_OSPKE)) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } void release_thread(struct task_struct *dead_task) { WARN_ON(dead_task->mm); } enum which_selector { FS, GS }; /* * Out of line to be protected from kprobes and tracing. If this would be * traced or probed than any access to a per CPU variable happens with * the wrong GS. * * It is not used on Xen paravirt. When paravirt support is needed, it * needs to be renamed with native_ prefix. */ static noinstr unsigned long __rdgsbase_inactive(void) { unsigned long gsbase; lockdep_assert_irqs_disabled(); if (!cpu_feature_enabled(X86_FEATURE_XENPV)) { native_swapgs(); gsbase = rdgsbase(); native_swapgs(); } else { instrumentation_begin(); rdmsrl(MSR_KERNEL_GS_BASE, gsbase); instrumentation_end(); } return gsbase; } /* * Out of line to be protected from kprobes and tracing. If this would be * traced or probed than any access to a per CPU variable happens with * the wrong GS. * * It is not used on Xen paravirt. When paravirt support is needed, it * needs to be renamed with native_ prefix. */ static noinstr void __wrgsbase_inactive(unsigned long gsbase) { lockdep_assert_irqs_disabled(); if (!cpu_feature_enabled(X86_FEATURE_XENPV)) { native_swapgs(); wrgsbase(gsbase); native_swapgs(); } else { instrumentation_begin(); wrmsrl(MSR_KERNEL_GS_BASE, gsbase); instrumentation_end(); } } /* * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are * not available. The goal is to be reasonably fast on non-FSGSBASE systems. * It's forcibly inlined because it'll generate better code and this function * is hot. */ static __always_inline void save_base_legacy(struct task_struct *prev_p, unsigned short selector, enum which_selector which) { if (likely(selector == 0)) { /* * On Intel (without X86_BUG_NULL_SEG), the segment base could * be the pre-existing saved base or it could be zero. On AMD * (with X86_BUG_NULL_SEG), the segment base could be almost * anything. * * This branch is very hot (it's hit twice on almost every * context switch between 64-bit programs), and avoiding * the RDMSR helps a lot, so we just assume that whatever * value is already saved is correct. This matches historical * Linux behavior, so it won't break existing applications. * * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we * report that the base is zero, it needs to actually be zero: * see the corresponding logic in load_seg_legacy. */ } else { /* * If the selector is 1, 2, or 3, then the base is zero on * !X86_BUG_NULL_SEG CPUs and could be anything on * X86_BUG_NULL_SEG CPUs. In the latter case, Linux * has never attempted to preserve the base across context * switches. * * If selector > 3, then it refers to a real segment, and * saving the base isn't necessary. */ if (which == FS) prev_p->thread.fsbase = 0; else prev_p->thread.gsbase = 0; } } static __always_inline void save_fsgs(struct task_struct *task) { savesegment(fs, task->thread.fsindex); savesegment(gs, task->thread.gsindex); if (static_cpu_has(X86_FEATURE_FSGSBASE)) { /* * If FSGSBASE is enabled, we can't make any useful guesses * about the base, and user code expects us to save the current * value. Fortunately, reading the base directly is efficient. */ task->thread.fsbase = rdfsbase(); task->thread.gsbase = __rdgsbase_inactive(); } else { save_base_legacy(task, task->thread.fsindex, FS); save_base_legacy(task, task->thread.gsindex, GS); } } /* * While a process is running,current->thread.fsbase and current->thread.gsbase * may not match the corresponding CPU registers (see save_base_legacy()). */ void current_save_fsgs(void) { unsigned long flags; /* Interrupts need to be off for FSGSBASE */ local_irq_save(flags); save_fsgs(current); local_irq_restore(flags); } #if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL_GPL(current_save_fsgs); #endif static __always_inline void loadseg(enum which_selector which, unsigned short sel) { if (which == FS) loadsegment(fs, sel); else load_gs_index(sel); } static __always_inline void load_seg_legacy(unsigned short prev_index, unsigned long prev_base, unsigned short next_index, unsigned long next_base, enum which_selector which) { if (likely(next_index <= 3)) { /* * The next task is using 64-bit TLS, is not using this * segment at all, or is having fun with arcane CPU features. */ if (next_base == 0) { /* * Nasty case: on AMD CPUs, we need to forcibly zero * the base. */ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { loadseg(which, __USER_DS); loadseg(which, next_index); } else { /* * We could try to exhaustively detect cases * under which we can skip the segment load, * but there's really only one case that matters * for performance: if both the previous and * next states are fully zeroed, we can skip * the load. * * (This assumes that prev_base == 0 has no * false positives. This is the case on * Intel-style CPUs.) */ if (likely(prev_index | next_index | prev_base)) loadseg(which, next_index); } } else { if (prev_index != next_index) loadseg(which, next_index); wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE, next_base); } } else { /* * The next task is using a real segment. Loading the selector * is sufficient. */ loadseg(which, next_index); } } /* * Store prev's PKRU value and load next's PKRU value if they differ. PKRU * is not XSTATE managed on context switch because that would require a * lookup in the task's FPU xsave buffer and require to keep that updated * in various places. */ static __always_inline void x86_pkru_load(struct thread_struct *prev, struct thread_struct *next) { if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; /* Stash the prev task's value: */ prev->pkru = rdpkru(); /* * PKRU writes are slightly expensive. Avoid them when not * strictly necessary: */ if (prev->pkru != next->pkru) wrpkru(next->pkru); } static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, struct thread_struct *next) { if (static_cpu_has(X86_FEATURE_FSGSBASE)) { /* Update the FS and GS selectors if they could have changed. */ if (unlikely(prev->fsindex || next->fsindex)) loadseg(FS, next->fsindex); if (unlikely(prev->gsindex || next->gsindex)) loadseg(GS, next->gsindex); /* Update the bases. */ wrfsbase(next->fsbase); __wrgsbase_inactive(next->gsbase); } else { load_seg_legacy(prev->fsindex, prev->fsbase, next->fsindex, next->fsbase, FS); load_seg_legacy(prev->gsindex, prev->gsbase, next->gsindex, next->gsbase, GS); } } unsigned long x86_fsgsbase_read_task(struct task_struct *task, unsigned short selector) { unsigned short idx = selector >> 3; unsigned long base; if (likely((selector & SEGMENT_TI_MASK) == 0)) { if (unlikely(idx >= GDT_ENTRIES)) return 0; /* * There are no user segments in the GDT with nonzero bases * other than the TLS segments. */ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return 0; idx -= GDT_ENTRY_TLS_MIN; base = get_desc_base(&task->thread.tls_array[idx]); } else { #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; /* * If performance here mattered, we could protect the LDT * with RCU. This is a slow path, though, so we can just * take the mutex. */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); mutex_unlock(&task->mm->context.lock); #else base = 0; #endif } return base; } unsigned long x86_gsbase_read_cpu_inactive(void) { unsigned long gsbase; if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { unsigned long flags; local_irq_save(flags); gsbase = __rdgsbase_inactive(); local_irq_restore(flags); } else { rdmsrl(MSR_KERNEL_GS_BASE, gsbase); } return gsbase; } void x86_gsbase_write_cpu_inactive(unsigned long gsbase) { if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { unsigned long flags; local_irq_save(flags); __wrgsbase_inactive(gsbase); local_irq_restore(flags); } else { wrmsrl(MSR_KERNEL_GS_BASE, gsbase); } } unsigned long x86_fsbase_read_task(struct task_struct *task) { unsigned long fsbase; if (task == current) fsbase = x86_fsbase_read_cpu(); else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || (task->thread.fsindex == 0)) fsbase = task->thread.fsbase; else fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); return fsbase; } unsigned long x86_gsbase_read_task(struct task_struct *task) { unsigned long gsbase; if (task == current) gsbase = x86_gsbase_read_cpu_inactive(); else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || (task->thread.gsindex == 0)) gsbase = task->thread.gsbase; else gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); return gsbase; } void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) { WARN_ON_ONCE(task == current); task->thread.fsbase = fsbase; } void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) { WARN_ON_ONCE(task == current); task->thread.gsbase = gsbase; } static void start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, unsigned int _cs, unsigned int _ss, unsigned int _ds) { WARN_ON_ONCE(regs != current_pt_regs()); if (static_cpu_has(X86_BUG_NULL_SEG)) { /* Loading zero below won't clear the base. */ loadsegment(fs, __USER_DS); load_gs_index(__USER_DS); } reset_thread_features(); loadsegment(fs, 0); loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; } void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { start_thread_common(regs, new_ip, new_sp, __USER_CS, __USER_DS, 0); } EXPORT_SYMBOL_GPL(start_thread); #ifdef CONFIG_COMPAT void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32) { start_thread_common(regs, new_ip, new_sp, x32 ? __USER_CS : __USER32_CS, __USER_DS, __USER_DS); } #endif /* * switch_to(x,y) should switch tasks from x to y. * * This could still be optimized: * - fold all the options into a flag word and test it with a single test. * - could test fs/gs bitsliced * * Kprobes not supported here. Set the probe on schedule instead. * Function graph tracer not supported too. */ __no_kmsan_checks __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(pcpu_hot.hardirq_stack_inuse)); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). * * (e.g. xen_load_tls()) */ save_fsgs(prev_p); /* * Load TLS before restoring any segments so that segment loads * reference the correct GDT entries. */ load_TLS(next, cpu); /* * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before * loading segments that might reference them. */ arch_end_context_switch(next_p); /* Switch DS and ES. * * Reading them only returns the selectors, but writing them (if * nonzero) loads the full descriptor from the GDT or LDT. The * LDT for next is loaded in switch_mm, and the GDT is loaded * above. * * We therefore need to write new values to the segment * registers on every context switch unless both the new and old * values are zero. * * Note that we don't need to do anything for CS and SS, as * those are saved and restored as part of pt_regs. */ savesegment(es, prev->es); if (unlikely(next->es | prev->es)) loadsegment(es, next->es); savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) loadsegment(ds, next->ds); x86_fsgsbase_load(prev, next); x86_pkru_load(prev, next); /* * Switch the PDA and FPU contexts. */ raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(); /* Reload sp0. */ update_task_stack(next_p); switch_to_extra(prev_p, next_p); if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but * does not update the cached descriptor. As a result, if we * do SYSRET while SS is NULL, we'll end up in user mode with * SS apparently equal to __USER_DS but actually unusable. * * The straightforward workaround would be to fix it up just * before SYSRET, but that would slow down the system call * fast paths. Instead, we ensure that SS is never NULL in * system call context. We do this by replacing NULL SS * selectors at every context switch. SYSCALL sets up a valid * SS, so the only way to get NULL is to re-enter the kernel * from CPL 3 through an interrupt. Since that can't happen * in the same task as a running syscall, we are guaranteed to * context switch between every interrupt vector entry and a * subsequent SYSRET. * * We read SS first because SS reads are much faster than * writes. Out of caution, we force SS to __KERNEL_DS even if * it previously had a different non-NULL value. */ unsigned short ss_sel; savesegment(ss, ss_sel); if (ss_sel != __KERNEL_DS) loadsegment(ss, __KERNEL_DS); } /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(next_p); return prev_p; } void set_personality_64bit(void) { /* inherit personality from parent */ /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_ADDR32); /* Pretend that this comes from a 64bit execve */ task_pt_regs(current)->orig_ax = __NR_execve; current_thread_info()->status &= ~TS_COMPAT; if (current->mm) __set_bit(MM_CONTEXT_HAS_VSYSCALL, ¤t->mm->context.flags); /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, so it's not too bad. The main problem is just that 32bit children are affected again. */ current->personality &= ~READ_IMPLIES_EXEC; } static void __set_personality_x32(void) { #ifdef CONFIG_X86_X32_ABI if (current->mm) current->mm->context.flags = 0; current->personality &= ~READ_IMPLIES_EXEC; /* * in_32bit_syscall() uses the presence of the x32 syscall bit * flag to determine compat status. The x86 mmap() code relies on * the syscall bitness so set x32 syscall bit right here to make * in_32bit_syscall() work during exec(). * * Pretend to come from a x32 execve. */ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; current_thread_info()->status &= ~TS_COMPAT; #endif } static void __set_personality_ia32(void) { #ifdef CONFIG_IA32_EMULATION if (current->mm) { /* * uprobes applied to this MM need to know this and * cannot use user_64bit_mode() at that time. */ __set_bit(MM_CONTEXT_UPROBE_IA32, ¤t->mm->context.flags); } current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; current_thread_info()->status |= TS_COMPAT; #endif } void set_personality_ia32(bool x32) { /* Make sure to be in 32bit mode */ set_thread_flag(TIF_ADDR32); if (x32) __set_personality_x32(); else __set_personality_ia32(); } EXPORT_SYMBOL_GPL(set_personality_ia32); #ifdef CONFIG_CHECKPOINT_RESTORE static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) { int ret; ret = map_vdso_once(image, addr); if (ret) return ret; return (long)image->size; } #endif #ifdef CONFIG_ADDRESS_MASKING #define LAM_U57_BITS 6 static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) { if (!cpu_feature_enabled(X86_FEATURE_LAM)) return -ENODEV; /* PTRACE_ARCH_PRCTL */ if (current->mm != mm) return -EINVAL; if (mm_valid_pasid(mm) && !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) return -EINVAL; if (mmap_write_lock_killable(mm)) return -EINTR; if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) { mmap_write_unlock(mm); return -EBUSY; } if (!nr_bits) { mmap_write_unlock(mm); return -EINVAL; } else if (nr_bits <= LAM_U57_BITS) { mm->context.lam_cr3_mask = X86_CR3_LAM_U57; mm->context.untag_mask = ~GENMASK(62, 57); } else { mmap_write_unlock(mm); return -EINVAL; } write_cr3(__read_cr3() | mm->context.lam_cr3_mask); set_tlbstate_lam_mode(mm); set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); mmap_write_unlock(mm); return 0; } #endif long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) { int ret = 0; switch (option) { case ARCH_SET_GS: { if (unlikely(arg2 >= TASK_SIZE_MAX)) return -EPERM; preempt_disable(); /* * ARCH_SET_GS has always overwritten the index * and the base. Zero is the most sensible value * to put in the index, and is the only value that * makes any sense if FSGSBASE is unavailable. */ if (task == current) { loadseg(GS, 0); x86_gsbase_write_cpu_inactive(arg2); /* * On non-FSGSBASE systems, save_base_legacy() expects * that we also fill in thread.gsbase. */ task->thread.gsbase = arg2; } else { task->thread.gsindex = 0; x86_gsbase_write_task(task, arg2); } preempt_enable(); break; } case ARCH_SET_FS: { /* * Not strictly needed for %fs, but do it for symmetry * with %gs */ if (unlikely(arg2 >= TASK_SIZE_MAX)) return -EPERM; preempt_disable(); /* * Set the selector to 0 for the same reason * as %gs above. */ if (task == current) { loadseg(FS, 0); x86_fsbase_write_cpu(arg2); /* * On non-FSGSBASE systems, save_base_legacy() expects * that we also fill in thread.fsbase. */ task->thread.fsbase = arg2; } else { task->thread.fsindex = 0; x86_fsbase_write_task(task, arg2); } preempt_enable(); break; } case ARCH_GET_FS: { unsigned long base = x86_fsbase_read_task(task); ret = put_user(base, (unsigned long __user *)arg2); break; } case ARCH_GET_GS: { unsigned long base = x86_gsbase_read_task(task); ret = put_user(base, (unsigned long __user *)arg2); break; } #ifdef CONFIG_CHECKPOINT_RESTORE # ifdef CONFIG_X86_X32_ABI case ARCH_MAP_VDSO_X32: return prctl_map_vdso(&vdso_image_x32, arg2); # endif # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: return prctl_map_vdso(&vdso_image_32, arg2); # endif case ARCH_MAP_VDSO_64: return prctl_map_vdso(&vdso_image_64, arg2); #endif #ifdef CONFIG_ADDRESS_MASKING case ARCH_GET_UNTAG_MASK: return put_user(task->mm->context.untag_mask, (unsigned long __user *)arg2); case ARCH_ENABLE_TAGGED_ADDR: return prctl_enable_tagged_addr(task->mm, arg2); case ARCH_FORCE_TAGGED_SVA: if (current != task) return -EINVAL; set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); return 0; case ARCH_GET_MAX_TAG_BITS: if (!cpu_feature_enabled(X86_FEATURE_LAM)) return put_user(0, (unsigned long __user *)arg2); else return put_user(LAM_U57_BITS, (unsigned long __user *)arg2); #endif case ARCH_SHSTK_ENABLE: case ARCH_SHSTK_DISABLE: case ARCH_SHSTK_LOCK: case ARCH_SHSTK_UNLOCK: case ARCH_SHSTK_STATUS: return shstk_prctl(task, option, arg2); default: ret = -EINVAL; break; } return ret; } SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) { long ret; ret = do_arch_prctl_64(current, option, arg2); if (ret == -EINVAL) ret = do_arch_prctl_common(option, arg2); return ret; } #ifdef CONFIG_IA32_EMULATION COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) { return do_arch_prctl_common(option, arg2); } #endif unsigned long KSTK_ESP(struct task_struct *task) { return task_pt_regs(task)->sp; } |
33 3078 3073 33 || // SPDX-License-Identifier: GPL-2.0 /* * bio-integrity.c - bio data integrity extensions * * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> */ #include <linux/blk-integrity.h> #include <linux/mempool.h> #include <linux/export.h> #include <linux/bio.h> #include <linux/workqueue.h> #include <linux/slab.h> #include "blk.h" static struct kmem_cache *bip_slab; static struct workqueue_struct *kintegrityd_wq; void blk_flush_integrity(void) { flush_workqueue(kintegrityd_wq); } static void __bio_integrity_free(struct bio_set *bs, struct bio_integrity_payload *bip) { if (bs && mempool_initialized(&bs->bio_integrity_pool)) { if (bip->bip_vec) bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_max_vcnt); mempool_free(bip, &bs->bio_integrity_pool); } else { kfree(bip); } } /** * bio_integrity_alloc - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to * @gfp_mask: Memory allocation mask * @nr_vecs: Number of integrity metadata scatter-gather elements * * Description: This function prepares a bio for attaching integrity * metadata. nr_vecs specifies the maximum number of pages containing * integrity metadata that can be attached. */ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp_mask, unsigned int nr_vecs) { struct bio_integrity_payload *bip; struct bio_set *bs = bio->bi_pool; unsigned inline_vecs; if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) return ERR_PTR(-EOPNOTSUPP); if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask); inline_vecs = nr_vecs; } else { bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); inline_vecs = BIO_INLINE_VECS; } if (unlikely(!bip)) return ERR_PTR(-ENOMEM); memset(bip, 0, sizeof(*bip)); if (nr_vecs > inline_vecs) { bip->bip_max_vcnt = nr_vecs; bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool, &bip->bip_max_vcnt, gfp_mask); if (!bip->bip_vec) goto err; } else { bip->bip_vec = bip->bip_inline_vecs; bip->bip_max_vcnt = inline_vecs; } bip->bip_bio = bio; bio->bi_integrity = bip; bio->bi_opf |= REQ_INTEGRITY; return bip; err: __bio_integrity_free(bs, bip); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ void bio_integrity_free(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; if (bip->bip_flags & BIP_BLOCK_INTEGRITY) kfree(bvec_virt(bip->bip_vec)); __bio_integrity_free(bs, bip); bio->bi_integrity = NULL; bio->bi_opf &= ~REQ_INTEGRITY; } /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update * @page: page containing integrity metadata * @len: number of bytes of integrity metadata in page * @offset: start offset within page * * Description: Attach a page containing integrity metadata to bio. */ int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct bio_integrity_payload *bip = bio_integrity(bio); if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) return 0; if (bip->bip_vcnt > 0) { struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; bool same_page = false; if (bvec_try_merge_hw_page(q, bv, page, len, offset, &same_page)) { bip->bip_iter.bi_size += len; return len; } if (bip->bip_vcnt >= min(bip->bip_max_vcnt, queue_max_integrity_segments(q))) return 0; /* * If the queue doesn't support SG gaps and adding this segment * would create a gap, disallow it. */ if (bvec_gap_to_prev(&q->limits, bv, offset)) return 0; } bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset); bip->bip_vcnt++; bip->bip_iter.bi_size += len; return len; } EXPORT_SYMBOL(bio_integrity_add_page); /** * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for * @proc_iter: iterator to process * @proc_fn: Pointer to the relevant processing function */ static blk_status_t bio_integrity_process(struct bio *bio, struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct blk_integrity_iter iter; struct bvec_iter bviter; struct bio_vec bv; struct bio_integrity_payload *bip = bio_integrity(bio); blk_status_t ret = BLK_STS_OK; iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; iter.tuple_size = bi->tuple_size; iter.seed = proc_iter->bi_sector; iter.prot_buf = bvec_virt(bip->bip_vec); __bio_for_each_segment(bv, bio, bviter, *proc_iter) { void *kaddr = bvec_kmap_local(&bv); iter.data_buf = kaddr; iter.data_size = bv.bv_len; ret = proc_fn(&iter); kunmap_local(kaddr); if (ret) break; } return ret; } /** * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * * Description: Checks if the bio already has an integrity payload attached. * If it does, the payload has been generated by another kernel subsystem, * and we just pass it through. Otherwise allocates integrity payload. * The bio must have data direction, target device and start sector set priot * to calling. In the WRITE case, integrity metadata will be generated using * the block device's integrity function. In the READ case, the buffer * will be prepared for DMA and a suitable end_io handler set up. */ bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); void *buf; unsigned long start, end; unsigned int len, nr_pages; unsigned int bytes, offset, i; if (!bi) return true; if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) return true; if (!bio_sectors(bio)) return true; /* Already protected? */ if (bio_integrity(bio)) return true; if (bio_data_dir(bio) == READ) { if (!bi->profile->verify_fn || !(bi->flags & BLK_INTEGRITY_VERIFY)) return true; } else { if (!bi->profile->generate_fn || !(bi->flags & BLK_INTEGRITY_GENERATE)) return true; } /* Allocate kernel buffer for protection data */ len = bio_integrity_bytes(bi, bio_sectors(bio)); buf = kmalloc(len, GFP_NOIO); if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); goto err_end_io; } end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; start = ((unsigned long) buf) >> PAGE_SHIFT; nr_pages = end - start; /* Allocate bio integrity payload and integrity vectors */ bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); if (IS_ERR(bip)) { printk(KERN_ERR "could not allocate data integrity bioset\n"); kfree(buf); goto err_end_io; } bip->bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(bip, bio->bi_iter.bi_sector); if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) bip->bip_flags |= BIP_IP_CHECKSUM; /* Map it */ offset = offset_in_page(buf); for (i = 0; i < nr_pages && len > 0; i++) { bytes = PAGE_SIZE - offset; if (bytes > len) bytes = len; if (bio_integrity_add_page(bio, virt_to_page(buf), bytes, offset) < bytes) { printk(KERN_ERR "could not attach integrity payload\n"); goto err_end_io; } buf += bytes; len -= bytes; offset = 0; } /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE) { bio_integrity_process(bio, &bio->bi_iter, bi->profile->generate_fn); } else { bip->bio_iter = bio->bi_iter; } return true; err_end_io: bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return false; } EXPORT_SYMBOL(bio_integrity_prep); /** * bio_integrity_verify_fn - Integrity I/O completion worker * @work: Work struct stored in bio to be verified * * Description: This workqueue function is called to complete a READ * request. The function verifies the transferred integrity metadata * and then calls the original bio end_io function. */ static void bio_integrity_verify_fn(struct work_struct *work) { struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, bi->profile->verify_fn); bio_integrity_free(bio); bio_endio(bio); } /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio * * Description: Completion for integrity I/O * * Normally I/O completion is done in interrupt context. However, * verifying I/O integrity is a time-consuming task which must be run * in process context. This function postpones completion * accordingly. */ bool __bio_integrity_endio(struct bio *bio) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_payload *bip = bio_integrity(bio); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) { INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); return false; } bio_integrity_free(bio); return true; } /** * bio_integrity_advance - Advance integrity vector * @bio: bio whose integrity vector to update * @bytes_done: number of data bytes that have been completed * * Description: This function calculates how many integrity bytes the * number of completed data bytes correspond to and advances the * integrity vector accordingly. */ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } /** * bio_integrity_trim - Trim integrity vector * @bio: bio whose integrity vector to update * * Description: Used to trim the integrity vector in a cloned bio. */ void bio_integrity_trim(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); } EXPORT_SYMBOL(bio_integrity_trim); /** * bio_integrity_clone - Callback for cloning bios with integrity metadata * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask * * Description: Called to allocate a bip when cloning a bio */ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { struct bio_integrity_payload *bip_src = bio_integrity(bio_src); struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); if (IS_ERR(bip)) return PTR_ERR(bip); memcpy(bip->bip_vec, bip_src->bip_vec, bip_src->bip_vcnt * sizeof(struct bio_vec)); bip->bip_vcnt = bip_src->bip_vcnt; bip->bip_iter = bip_src->bip_iter; bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY; return 0; } int bioset_integrity_create(struct bio_set *bs, int pool_size) { if (mempool_initialized(&bs->bio_integrity_pool)) return 0; if (mempool_init_slab_pool(&bs->bio_integrity_pool, pool_size, bip_slab)) return -1; if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) { mempool_exit(&bs->bio_integrity_pool); return -1; } return 0; } EXPORT_SYMBOL(bioset_integrity_create); void bioset_integrity_free(struct bio_set *bs) { mempool_exit(&bs->bio_integrity_pool); mempool_exit(&bs->bvec_integrity_pool); } void __init bio_integrity_init(void) { /* * kintegrityd won't block much but may burn a lot of CPU cycles. * Make it highpri CPU intensive wq with max concurrency of 1. */ kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); bip_slab = kmem_cache_create("bio_integrity_payload", sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * BIO_INLINE_VECS, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } |
9 3 4 2 3 20 49 68 69 1 2 2 2 2 18 18 3 3 3 7 13 8 5 8 6 6 8 5 7 3 3 6 3 13 18 2 1 4 4 13 3 15 18 || // SPDX-License-Identifier: GPL-2.0 /* * NTP state machine interfaces and logic. * * This code was mainly moved from kernel/timer.c and kernel/time.c * Please see those files for relevant copyright info and historical * changelogs. */ #include <linux/capability.h> #include <linux/clocksource.h> #include <linux/workqueue.h> #include <linux/hrtimer.h> #include <linux/jiffies.h> #include <linux/math64.h> #include <linux/timex.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/rtc.h> #include <linux/audit.h> #include "ntp_internal.h" #include "timekeeping_internal.h" /* * NTP timekeeping variables: * * Note: All of the NTP state is protected by the timekeeping locks. */ /* USER_HZ period (usecs): */ unsigned long tick_usec = USER_TICK_USEC; /* SHIFTED_HZ period (nsecs): */ unsigned long tick_nsec; static u64 tick_length; static u64 tick_length_base; #define SECS_PER_DAY 86400 #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) #define MAX_TAI_OFFSET 100000 /* * phase-lock loop variables */ /* * clock synchronization status * * (TIME_ERROR prevents overwriting the CMOS clock) */ static int time_state = TIME_OK; /* clock status bits: */ static int time_status = STA_UNSYNC; /* time adjustment (nsecs): */ static s64 time_offset; /* pll time constant: */ static long time_constant = 2; /* maximum error (usecs): */ static long time_maxerror = NTP_PHASE_LIMIT; /* estimated error (usecs): */ static long time_esterror = NTP_PHASE_LIMIT; /* frequency offset (scaled nsecs/secs): */ static s64 time_freq; /* time at last adjustment (secs): */ static time64_t time_reftime; static long time_adjust; /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ static s64 ntp_tick_adj; /* second value of the next pending leapsecond, or TIME64_MAX if no leap */ static time64_t ntp_next_leap_sec = TIME64_MAX; #ifdef CONFIG_NTP_PPS /* * The following variables are used when a pulse-per-second (PPS) signal * is available. They establish the engineering parameters of the clock * discipline loop when controlled by the PPS signal. */ #define PPS_VALID 10 /* PPS signal watchdog max (s) */ #define PPS_POPCORN 4 /* popcorn spike threshold (shift) */ #define PPS_INTMIN 2 /* min freq interval (s) (shift) */ #define PPS_INTMAX 8 /* max freq interval (s) (shift) */ #define PPS_INTCOUNT 4 /* number of consecutive good intervals to increase pps_shift or consecutive bad intervals to decrease it */ #define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */ static int pps_valid; /* signal watchdog counter */ static long pps_tf[3]; /* phase median filter */ static long pps_jitter; /* current jitter (ns) */ static struct timespec64 pps_fbase; /* beginning of the last freq interval */ static int pps_shift; /* current interval duration (s) (shift) */ static int pps_intcnt; /* interval counter */ static s64 pps_freq; /* frequency offset (scaled ns/s) */ static long pps_stabil; /* current stability (scaled ns/s) */ /* * PPS signal quality monitors */ static long pps_calcnt; /* calibration intervals */ static long pps_jitcnt; /* jitter limit exceeded */ static long pps_stbcnt; /* stability limit exceeded */ static long pps_errcnt; /* calibration errors */ /* PPS kernel consumer compensates the whole phase error immediately. * Otherwise, reduce the offset by a fixed factor times the time constant. */ static inline s64 ntp_offset_chunk(s64 offset) { if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) return offset; else return shift_right(offset, SHIFT_PLL + time_constant); } static inline void pps_reset_freq_interval(void) { /* the PPS calibration interval may end surprisingly early */ pps_shift = PPS_INTMIN; pps_intcnt = 0; } /** * pps_clear - Clears the PPS state variables */ static inline void pps_clear(void) { pps_reset_freq_interval(); pps_tf[0] = 0; pps_tf[1] = 0; pps_tf[2] = 0; pps_fbase.tv_sec = pps_fbase.tv_nsec = 0; pps_freq = 0; } /* Decrease pps_valid to indicate that another second has passed since * the last PPS signal. When it reaches 0, indicate that PPS signal is * missing. */ static inline void pps_dec_valid(void) { if (pps_valid > 0) pps_valid--; else { time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); pps_clear(); } } static inline void pps_set_freq(s64 freq) { pps_freq = freq; } static inline int is_error_status(int status) { return (status & (STA_UNSYNC|STA_CLOCKERR)) /* PPS signal lost when either PPS time or * PPS frequency synchronization requested */ || ((status & (STA_PPSFREQ|STA_PPSTIME)) && !(status & STA_PPSSIGNAL)) /* PPS jitter exceeded when * PPS time synchronization requested */ || ((status & (STA_PPSTIME|STA_PPSJITTER)) == (STA_PPSTIME|STA_PPSJITTER)) /* PPS wander exceeded or calibration error when * PPS frequency synchronization requested */ || ((status & STA_PPSFREQ) && (status & (STA_PPSWANDER|STA_PPSERROR))); } static inline void pps_fill_timex(struct __kernel_timex *txc) { txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) * PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->jitter = pps_jitter; if (!(time_status & STA_NANO)) txc->jitter = pps_jitter / NSEC_PER_USEC; txc->shift = pps_shift; txc->stabil = pps_stabil; txc->jitcnt = pps_jitcnt; txc->calcnt = pps_calcnt; txc->errcnt = pps_errcnt; txc->stbcnt = pps_stbcnt; } #else /* !CONFIG_NTP_PPS */ static inline s64 ntp_offset_chunk(s64 offset) { return shift_right(offset, SHIFT_PLL + time_constant); } static inline void pps_reset_freq_interval(void) {} static inline void pps_clear(void) {} static inline void pps_dec_valid(void) {} static inline void pps_set_freq(s64 freq) {} static inline int is_error_status(int status) { return status & (STA_UNSYNC|STA_CLOCKERR); } static inline void pps_fill_timex(struct __kernel_timex *txc) { /* PPS is not implemented, so these are zero */ txc->ppsfreq = 0; txc->jitter = 0; txc->shift = 0; txc->stabil = 0; txc->jitcnt = 0; txc->calcnt = 0; txc->errcnt = 0; txc->stbcnt = 0; } #endif /* CONFIG_NTP_PPS */ /** * ntp_synced - Returns 1 if the NTP status is not UNSYNC * */ static inline int ntp_synced(void) { return !(time_status & STA_UNSYNC); } /* * NTP methods: */ /* * Update (tick_length, tick_length_base, tick_nsec), based * on (tick_usec, ntp_tick_adj, time_freq): */ static void ntp_update_frequency(void) { u64 second_length; u64 new_base; second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT; second_length += ntp_tick_adj; second_length += time_freq; tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; new_base = div_u64(second_length, NTP_INTERVAL_FREQ); /* * Don't wait for the next second_overflow, apply * the change to the tick length immediately: */ tick_length += new_base - tick_length_base; tick_length_base = new_base; } static inline s64 ntp_update_offset_fll(s64 offset64, long secs) { time_status &= ~STA_MODE; if (secs < MINSEC) return 0; if (!(time_status & STA_FLL) && (secs <= MAXSEC)) return 0; time_status |= STA_MODE; return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); } static void ntp_update_offset(long offset) { s64 freq_adj; s64 offset64; long secs; if (!(time_status & STA_PLL)) return; if (!(time_status & STA_NANO)) { /* Make sure the multiplication below won't overflow */ offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC); offset *= NSEC_PER_USEC; } /* * Scale the phase adjustment and * clamp to the operating range. */ offset = clamp(offset, -MAXPHASE, MAXPHASE); /* * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ secs = (long)(__ktime_get_real_seconds() - time_reftime); if (unlikely(time_status & STA_FREQHOLD)) secs = 0; time_reftime = __ktime_get_real_seconds(); offset64 = offset; freq_adj = ntp_update_offset_fll(offset64, secs); /* * Clamp update interval to reduce PLL gain with low * sampling rate (e.g. intermittent network connection) * to avoid instability. */ if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant))) secs = 1 << (SHIFT_PLL + 1 + time_constant); freq_adj += (offset64 * secs) << (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); time_freq = max(freq_adj, -MAXFREQ_SCALED); time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); } /** * ntp_clear - Clears the NTP state variables */ void ntp_clear(void) { time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; ntp_update_frequency(); tick_length = tick_length_base; time_offset = 0; ntp_next_leap_sec = TIME64_MAX; /* Clear PPS state variables */ pps_clear(); } u64 ntp_tick_length(void) { return tick_length; } /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t * * Provides the time of the next leapsecond against CLOCK_REALTIME in * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending. */ ktime_t ntp_get_next_leap(void) { ktime_t ret; if ((time_state == TIME_INS) && (time_status & STA_INS)) return ktime_set(ntp_next_leap_sec, 0); ret = KTIME_MAX; return ret; } /* * this routine handles the overflow of the microsecond field * * The tricky bits of code to handle the accurate clock support * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. * They were originally developed for SUN and DEC kernels. * All the kudos should go to Dave for this stuff. * * Also handles leap second processing, and returns leap offset */ int second_overflow(time64_t secs) { s64 delta; int leap = 0; s32 rem; /* * Leap second processing. If in leap-insert state at the end of the * day, the system clock is set back one second; if in leap-delete * state, the system clock is set ahead one second. */ switch (time_state) { case TIME_OK: if (time_status & STA_INS) { time_state = TIME_INS; div_s64_rem(secs, SECS_PER_DAY, &rem); ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } else if (time_status & STA_DEL) { time_state = TIME_DEL; div_s64_rem(secs + 1, SECS_PER_DAY, &rem); ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } break; case TIME_INS: if (!(time_status & STA_INS)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; } else if (secs == ntp_next_leap_sec) { leap = -1; time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } break; case TIME_DEL: if (!(time_status & STA_DEL)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; } else if (secs == ntp_next_leap_sec) { leap = 1; ntp_next_leap_sec = TIME64_MAX; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); } break; case TIME_OOP: ntp_next_leap_sec = TIME64_MAX; time_state = TIME_WAIT; break; case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; break; } /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; if (time_maxerror > NTP_PHASE_LIMIT) { time_maxerror = NTP_PHASE_LIMIT; time_status |= STA_UNSYNC; } /* Compute the phase adjustment for the next second */ tick_length = tick_length_base; delta = ntp_offset_chunk(time_offset); time_offset -= delta; tick_length += delta; /* Check PPS signal */ pps_dec_valid(); if (!time_adjust) goto out; if (time_adjust > MAX_TICKADJ) { time_adjust -= MAX_TICKADJ; tick_length += MAX_TICKADJ_SCALED; goto out; } if (time_adjust < -MAX_TICKADJ) { time_adjust += MAX_TICKADJ; tick_length -= MAX_TICKADJ_SCALED; goto out; } tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; out: return leap; } #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_hw_clock(struct work_struct *work); static DECLARE_WORK(sync_work, sync_hw_clock); static struct hrtimer sync_hrtimer; #define SYNC_PERIOD_NS (11ULL * 60 * NSEC_PER_SEC) static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer) { queue_work(system_freezable_power_efficient_wq, &sync_work); return HRTIMER_NORESTART; } static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry) { ktime_t exp = ktime_set(ktime_get_real_seconds(), 0); if (retry) exp = ktime_add_ns(exp, 2ULL * NSEC_PER_SEC - offset_nsec); else exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec); hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS); } /* * Check whether @now is correct versus the required time to update the RTC * and calculate the value which needs to be written to the RTC so that the * next seconds increment of the RTC after the write is aligned with the next * seconds increment of clock REALTIME. * * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds * * t2.tv_nsec == 0 * tsched = t2 - set_offset_nsec * newval = t2 - NSEC_PER_SEC * * ==> neval = tsched + set_offset_nsec - NSEC_PER_SEC * * As the execution of this code is not guaranteed to happen exactly at * tsched this allows it to happen within a fuzzy region: * * abs(now - tsched) < FUZZ * * If @now is not inside the allowed window the function returns false. */ static inline bool rtc_tv_nsec_ok(unsigned long set_offset_nsec, struct timespec64 *to_set, const struct timespec64 *now) { /* Allowed error in tv_nsec, arbitrarily set to 5 jiffies in ns. */ const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; struct timespec64 delay = {.tv_sec = -1, .tv_nsec = set_offset_nsec}; *to_set = timespec64_add(*now, delay); if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { to_set->tv_nsec = 0; return true; } if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { to_set->tv_sec++; to_set->tv_nsec = 0; return true; } return false; } #ifdef CONFIG_GENERIC_CMOS_UPDATE int __weak update_persistent_clock64(struct timespec64 now64) { return -ENODEV; } #else static inline int update_persistent_clock64(struct timespec64 now64) { return -ENODEV; } #endif #ifdef CONFIG_RTC_SYSTOHC /* Save NTP synchronized time to the RTC */ static int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) { struct rtc_device *rtc; struct rtc_time tm; int err = -ENODEV; rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE); if (!rtc) return -ENODEV; if (!rtc->ops || !rtc->ops->set_time) goto out_close; /* First call might not have the correct offset */ if (*offset_nsec == rtc->set_offset_nsec) { rtc_time64_to_tm(to_set->tv_sec, &tm); err = rtc_set_time(rtc, &tm); } else { /* Store the update offset and let the caller try again */ *offset_nsec = rtc->set_offset_nsec; err = -EAGAIN; } out_close: rtc_class_close(rtc); return err; } #else static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) { return -ENODEV; } #endif /* * If we have an externally synchronized Linux clock, then update RTC clock * accordingly every ~11 minutes. Generally RTCs can only store second * precision, but many RTCs will adjust the phase of their second tick to * match the moment of update. This infrastructure arranges to call to the RTC * set at the correct moment to phase synchronize the RTC second tick over * with the kernel clock. */ static void sync_hw_clock(struct work_struct *work) { /* * The default synchronization offset is 500ms for the deprecated * update_persistent_clock64() under the assumption that it uses * the infamous CMOS clock (MC146818). */ static unsigned long offset_nsec = NSEC_PER_SEC / 2; struct timespec64 now, to_set; int res = -EAGAIN; /* * Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer() * managed to schedule the work between the timer firing and the * work being able to rearm the timer. Wait for the timer to expire. */ if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer)) return; ktime_get_real_ts64(&now); /* If @now is not in the allowed window, try again */ if (!rtc_tv_nsec_ok(offset_nsec, &to_set, &now)) goto rearm; /* Take timezone adjusted RTCs into account */ if (persistent_clock_is_local) to_set.tv_sec -= (sys_tz.tz_minuteswest * 60); /* Try the legacy RTC first. */ res = update_persistent_clock64(to_set); if (res != -ENODEV) goto rearm; /* Try the RTC class */ res = update_rtc(&to_set, &offset_nsec); if (res == -ENODEV) return; rearm: sched_sync_hw_clock(offset_nsec, res != 0); } void ntp_notify_cmos_timer(void) { /* * When the work is currently executed but has not yet the timer * rearmed this queues the work immediately again. No big issue, * just a pointless work scheduled. */ if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) queue_work(system_freezable_power_efficient_wq, &sync_work); } static void __init ntp_init_cmos_sync(void) { hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS); sync_hrtimer.function = sync_timer_callback; } #else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ static inline void __init ntp_init_cmos_sync(void) { } #endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ /* * Propagate a new txc->status value into the NTP state: */ static inline void process_adj_status(const struct __kernel_timex *txc) { if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { time_state = TIME_OK; time_status = STA_UNSYNC; ntp_next_leap_sec = TIME64_MAX; /* restart PPS frequency calibration */ pps_reset_freq_interval(); } /* * If we turn on PLL adjustments then reset the * reference time to current time. */ if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) time_reftime = __ktime_get_real_seconds(); /* only set allowed bits */ time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; } static inline void process_adjtimex_modes(const struct __kernel_timex *txc, s32 *time_tai) { if (txc->modes & ADJ_STATUS) process_adj_status(txc); if (txc->modes & ADJ_NANO) time_status |= STA_NANO; if (txc->modes & ADJ_MICRO) time_status &= ~STA_NANO; if (txc->modes & ADJ_FREQUENCY) { time_freq = txc->freq * PPM_SCALE; time_freq = min(time_freq, MAXFREQ_SCALED); time_freq = max(time_freq, -MAXFREQ_SCALED); /* update pps_freq */ pps_set_freq(time_freq); } if (txc->modes & ADJ_MAXERROR) time_maxerror = txc->maxerror; if (txc->modes & ADJ_ESTERROR) time_esterror = txc->esterror; if (txc->modes & ADJ_TIMECONST) { time_constant = txc->constant; if (!(time_status & STA_NANO)) time_constant += 4; time_constant = min(time_constant, (long)MAXTC); time_constant = max(time_constant, 0l); } if (txc->modes & ADJ_TAI && txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET) *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) ntp_update_offset(txc->offset); if (txc->modes & ADJ_TICK) tick_usec = txc->tick; if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) ntp_update_frequency(); } /* * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad) { int result; if (txc->modes & ADJ_ADJTIME) { long save_adjust = time_adjust; if (!(txc->modes & ADJ_OFFSET_READONLY)) { /* adjtime() is independent from ntp_adjtime() */ time_adjust = txc->offset; ntp_update_frequency(); audit_ntp_set_old(ad, AUDIT_NTP_ADJUST, save_adjust); audit_ntp_set_new(ad, AUDIT_NTP_ADJUST, time_adjust); } txc->offset = save_adjust; } else { /* If there are input parameters, then process them: */ if (txc->modes) { audit_ntp_set_old(ad, AUDIT_NTP_OFFSET, time_offset); audit_ntp_set_old(ad, AUDIT_NTP_FREQ, time_freq); audit_ntp_set_old(ad, AUDIT_NTP_STATUS, time_status); audit_ntp_set_old(ad, AUDIT_NTP_TAI, *time_tai); audit_ntp_set_old(ad, AUDIT_NTP_TICK, tick_usec); process_adjtimex_modes(txc, time_tai); audit_ntp_set_new(ad, AUDIT_NTP_OFFSET, time_offset); audit_ntp_set_new(ad, AUDIT_NTP_FREQ, time_freq); audit_ntp_set_new(ad, AUDIT_NTP_STATUS, time_status); audit_ntp_set_new(ad, AUDIT_NTP_TAI, *time_tai); audit_ntp_set_new(ad, AUDIT_NTP_TICK, tick_usec); } txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); if (!(time_status & STA_NANO)) txc->offset = (u32)txc->offset / NSEC_PER_USEC; } result = time_state; /* mostly `TIME_OK' */ /* check for errors */ if (is_error_status(time_status)) result = TIME_ERROR; txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; txc->constant = time_constant; txc->precision = 1; txc->tolerance = MAXFREQ_SCALED / PPM_SCALE; txc->tick = tick_usec; txc->tai = *time_tai; /* fill PPS status fields */ pps_fill_timex(txc); txc->time.tv_sec = ts->tv_sec; txc->time.tv_usec = ts->tv_nsec; if (!(time_status & STA_NANO)) txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC; /* Handle leapsec adjustments */ if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) { if ((time_state == TIME_INS) && (time_status & STA_INS)) { result = TIME_OOP; txc->tai++; txc->time.tv_sec--; } if ((time_state == TIME_DEL) && (time_status & STA_DEL)) { result = TIME_WAIT; txc->tai--; txc->time.tv_sec++; } if ((time_state == TIME_OOP) && (ts->tv_sec == ntp_next_leap_sec)) { result = TIME_WAIT; } } return result; } #ifdef CONFIG_NTP_PPS /* actually struct pps_normtime is good old struct timespec, but it is * semantically different (and it is the reason why it was invented): * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */ struct pps_normtime { s64 sec; /* seconds */ long nsec; /* nanoseconds */ }; /* normalize the timestamp so that nsec is in the ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */ static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts) { struct pps_normtime norm = { .sec = ts.tv_sec, .nsec = ts.tv_nsec }; if (norm.nsec > (NSEC_PER_SEC >> 1)) { norm.nsec -= NSEC_PER_SEC; norm.sec++; } return norm; } /* get current phase correction and jitter */ static inline long pps_phase_filter_get(long *jitter) { *jitter = pps_tf[0] - pps_tf[1]; if (*jitter < 0) *jitter = -*jitter; /* TODO: test various filters */ return pps_tf[0]; } /* add the sample to the phase filter */ static inline void pps_phase_filter_add(long err) { pps_tf[2] = pps_tf[1]; pps_tf[1] = pps_tf[0]; pps_tf[0] = err; } /* decrease frequency calibration interval length. * It is halved after four consecutive unstable intervals. */ static inline void pps_dec_freq_interval(void) { if (--pps_intcnt <= -PPS_INTCOUNT) { pps_intcnt = -PPS_INTCOUNT; if (pps_shift > PPS_INTMIN) { pps_shift--; pps_intcnt = 0; } } } /* increase frequency calibration interval length. * It is doubled after four consecutive stable intervals. */ static inline void pps_inc_freq_interval(void) { if (++pps_intcnt >= PPS_INTCOUNT) { pps_intcnt = PPS_INTCOUNT; if (pps_shift < PPS_INTMAX) { pps_shift++; pps_intcnt = 0; } } } /* update clock frequency based on MONOTONIC_RAW clock PPS signal * timestamps * * At the end of the calibration interval the difference between the * first and last MONOTONIC_RAW clock timestamps divided by the length * of the interval becomes the frequency update. If the interval was * too long, the data are discarded. * Returns the difference between old and new frequency values. */ static long hardpps_update_freq(struct pps_normtime freq_norm) { long delta, delta_mod; s64 ftemp; /* check if the frequency interval was too long */ if (freq_norm.sec > (2 << pps_shift)) { time_status |= STA_PPSERROR; pps_errcnt++; pps_dec_freq_interval(); printk_deferred(KERN_ERR "hardpps: PPSERROR: interval too long - %lld s\n", freq_norm.sec); return 0; } /* here the raw frequency offset and wander (stability) is * calculated. If the wander is less than the wander threshold * the interval is increased; otherwise it is decreased. */ ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT, freq_norm.sec); delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); pps_freq = ftemp; if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { printk_deferred(KERN_WARNING "hardpps: PPSWANDER: change=%ld\n", delta); time_status |= STA_PPSWANDER; pps_stbcnt++; pps_dec_freq_interval(); } else { /* good sample */ pps_inc_freq_interval(); } /* the stability metric is calculated as the average of recent * frequency changes, but is used only for performance * monitoring */ delta_mod = delta; if (delta_mod < 0) delta_mod = -delta_mod; pps_stabil += (div_s64(((s64)delta_mod) << (NTP_SCALE_SHIFT - SHIFT_USEC), NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN; /* if enabled, the system clock frequency is updated */ if ((time_status & STA_PPSFREQ) != 0 && (time_status & STA_FREQHOLD) == 0) { time_freq = pps_freq; ntp_update_frequency(); } return delta; } /* correct REALTIME clock phase error against PPS signal */ static void hardpps_update_phase(long error) { long correction = -error; long jitter; /* add the sample to the median filter */ pps_phase_filter_add(correction); correction = pps_phase_filter_get(&jitter); /* Nominal jitter is due to PPS signal noise. If it exceeds the * threshold, the sample is discarded; otherwise, if so enabled, * the time offset is updated. */ if (jitter > (pps_jitter << PPS_POPCORN)) { printk_deferred(KERN_WARNING "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", jitter, (pps_jitter << PPS_POPCORN)); time_status |= STA_PPSJITTER; pps_jitcnt++; } else if (time_status & STA_PPSTIME) { /* correct the time using the phase offset */ time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); /* cancel running adjtime() */ time_adjust = 0; } /* update jitter */ pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN; } /* * __hardpps() - discipline CPU clock oscillator to external PPS signal * * This routine is called at each PPS signal arrival in order to * discipline the CPU clock oscillator to the PPS signal. It takes two * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former * is used to correct clock phase error and the latter is used to * correct the frequency. * * This code is based on David Mills's reference nanokernel * implementation. It was mostly rewritten but keeps the same idea. */ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { struct pps_normtime pts_norm, freq_norm; pts_norm = pps_normalize_ts(*phase_ts); /* clear the error bits, they will be set again if needed */ time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); /* indicate signal presence */ time_status |= STA_PPSSIGNAL; pps_valid = PPS_VALID; /* when called for the first time, * just start the frequency interval */ if (unlikely(pps_fbase.tv_sec == 0)) { pps_fbase = *raw_ts; return; } /* ok, now we have a base for frequency calculation */ freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase)); /* check that the signal is in the range * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */ if ((freq_norm.sec == 0) || (freq_norm.nsec > MAXFREQ * freq_norm.sec) || (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) { time_status |= STA_PPSJITTER; /* restart the frequency calibration interval */ pps_fbase = *raw_ts; printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n"); return; } /* signal is ok */ /* check if the current frequency interval is finished */ if (freq_norm.sec >= (1 << pps_shift)) { pps_calcnt++; /* restart the frequency calibration interval */ pps_fbase = *raw_ts; hardpps_update_freq(freq_norm); } hardpps_update_phase(pts_norm.nsec); } #endif /* CONFIG_NTP_PPS */ static int __init ntp_tick_adj_setup(char *str) { int rc = kstrtos64(str, 0, &ntp_tick_adj); if (rc) return rc; ntp_tick_adj <<= NTP_SCALE_SHIFT; return 1; } __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); ntp_init_cmos_sync(); } |
21 21 21 21 21 8 8 8 || // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * * This file contains power management functions related to interrupts. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include "internals.h" bool irq_pm_check_wakeup(struct irq_desc *desc) { if (irqd_is_wakeup_armed(&desc->irq_data)) { irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; desc->depth++; irq_disable(desc); pm_system_irq_wakeup(irq_desc_get_irq(desc)); return true; } return false; } /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. */ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions++; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth++; WARN_ON_ONCE(desc->force_resume_depth && desc->force_resume_depth != desc->nr_actions); if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* * Called from __free_irq() with desc->lock held after @action has * been removed from the action chain. */ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions--; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth--; if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc) { unsigned long chipflags = irq_desc_get_chip(desc)->flags; struct irq_data *irqd = &desc->irq_data; if (!desc->action || irq_desc_is_chained(desc) || desc->no_suspend_depth) return false; if (irqd_is_wakeup_set(irqd)) { irqd_set(irqd, IRQD_WAKEUP_ARMED); if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && irqd_irq_disabled(irqd)) { /* * Interrupt marked for wakeup is in disabled state. * Enable interrupt here to unmask/enable in irqchip * to be able to resume with such interrupts. */ __enable_irq(desc); irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } /* * We return true here to force the caller to issue * synchronize_irq(). We need to make sure that the * IRQD_WAKEUP_ARMED is visible before we return from * suspend_device_irqs(). */ return true; } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc); /* * Hardware which has no wakeup source configuration facility * requires that the non wakeup interrupts are masked at the * chip level. The chip implementation indicates that with * IRQCHIP_MASK_ON_SUSPEND. */ if (chipflags & IRQCHIP_MASK_ON_SUSPEND) mask_irq(desc); return true; } /** * suspend_device_irqs - disable all currently enabled interrupt lines * * During system-wide suspend or hibernation device drivers need to be * prevented from receiving interrupts and this function is provided * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND * set and those which are marked as active wakeup sources. * * The active wakeup sources are handled by the flow handler entry * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool sync; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); sync = suspend_device_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); if (sync) synchronize_irq(irq); } } static void resume_irq(struct irq_desc *desc) { struct irq_data *irqd = &desc->irq_data; irqd_clear(irqd, IRQD_WAKEUP_ARMED); if (irqd_is_enabled_on_suspend(irqd)) { /* * Interrupt marked for wakeup was enabled during suspend * entry. Disable such interrupts to restore them back to * original state. */ __disable_irq(desc); irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } if (desc->istate & IRQS_SUSPENDED) goto resume; /* Force resume the interrupt? */ if (!desc->force_resume_depth) return; /* Pretend that it got disabled ! */ desc->depth++; irq_state_set_disabled(desc); irq_state_set_masked(desc); resume: desc->istate &= ~IRQS_SUSPENDED; __enable_irq(desc); } static void resume_irqs(bool want_early) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); resume_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); } } /** * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup * @irq: Interrupt to rearm */ void rearm_wake_irq(unsigned int irq) { unsigned long flags; struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); if (!desc) return; if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) goto unlock; desc->istate &= ~IRQS_SUSPENDED; irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); __enable_irq(desc); unlock: irq_put_desc_busunlock(desc, flags); } /** * irq_pm_syscore_resume - enable interrupt lines early * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ static void irq_pm_syscore_resume(void) { resume_irqs(true); } static struct syscore_ops irq_pm_syscore_ops = { .resume = irq_pm_syscore_resume, }; static int __init irq_pm_init_ops(void) { register_syscore_ops(&irq_pm_syscore_ops); return 0; } device_initcall(irq_pm_init_ops); /** * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() * * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag * set as well as those with %IRQF_FORCE_RESUME. */ void resume_device_irqs(void) { resume_irqs(false); } |
21 2 2 658 3 3 2 3 1 9 9 3 3 3 1 9 9 10 637 637 635 635 47 47 73 73 74 73 6 621 618 777 776 6 6 6 12 7 2 3 2 6 7 6 15 2 11 11 6 1 1 1 14 1 13 13 1 1 21 13 19 20 1 6 18 11 18 1 18 4 18 1 1 3 2 3 2 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 3 1 1 1 1 1 2 1 1 3 1 1 21 1 1 5 4 75 4 2 4 8 1 7 9 2 11 || // SPDX-License-Identifier: GPL-2.0-only /* * fs/userfaultfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * Copyright (C) 2008-2009 Red Hat, Inc. * Copyright (C) 2015 Red Hat, Inc. * * Some part derived from fs/eventfd.c (anon inode setup) and * mm/ksm.c (mm hashing). */ #include <linux/list.h> #include <linux/hashtable.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/mmu_notifier.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/file.h> #include <linux/bug.h> #include <linux/anon_inodes.h> #include <linux/syscalls.h> #include <linux/userfaultfd_k.h> #include <linux/mempolicy.h> #include <linux/ioctl.h> #include <linux/security.h> #include <linux/hugetlb.h> #include <linux/swapops.h> #include <linux/miscdevice.h> static int sysctl_unprivileged_userfaultfd __read_mostly; #ifdef CONFIG_SYSCTL static struct ctl_table vm_userfaultfd_table[] = { { .procname = "unprivileged_userfaultfd", .data = &sysctl_unprivileged_userfaultfd, .maxlen = sizeof(sysctl_unprivileged_userfaultfd), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { } }; #endif static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init; /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. * * Locking order: * fd_wqh.lock * fault_pending_wqh.lock * fault_wqh.lock * event_wqh.lock * * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ wait_queue_head_t fault_pending_wqh; /* waitqueue head for the userfaults */ wait_queue_head_t fault_wqh; /* waitqueue head for the pseudo fd to wakeup poll/read */ wait_queue_head_t fd_wqh; /* waitqueue head for events */ wait_queue_head_t event_wqh; /* a refile sequence protected by fault_pending_wqh lock */ seqcount_spinlock_t refile_seq; /* pseudo fd refcounting */ refcount_t refcount; /* userfaultfd syscall flags */ unsigned int flags; /* features requested from the userspace */ unsigned int features; /* released */ bool released; /* memory mappings are changing because of non-cooperative event */ atomic_t mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; }; struct userfaultfd_fork_ctx { struct userfaultfd_ctx *orig; struct userfaultfd_ctx *new; struct list_head list; }; struct userfaultfd_unmap_ctx { struct userfaultfd_ctx *ctx; unsigned long start; unsigned long end; struct list_head list; }; struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_entry_t wq; struct userfaultfd_ctx *ctx; bool waken; }; struct userfaultfd_wake_range { unsigned long start; unsigned long len; }; /* internal indication that UFFD_API ioctl was successfully executed */ #define UFFD_FEATURE_INITIALIZED (1u << 31) static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) { return ctx->features & UFFD_FEATURE_INITIALIZED; } static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx) { return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC); } /* * Whether WP_UNPOPULATED is enabled on the uffd context. It is only * meaningful when userfaultfd_wp()==true on the vma and when it's * anonymous. */ bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) { struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) return false; return ctx->features & UFFD_FEATURE_WP_UNPOPULATED; } static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, vm_flags_t flags) { const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP; vm_flags_reset(vma, flags); /* * For shared mappings, we want to enable writenotify while * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply * recalculate vma->vm_page_prot whenever userfaultfd-wp changes. */ if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed) vma_set_page_prot(vma); } static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { struct userfaultfd_wake_range *range = key; int ret; struct userfaultfd_wait_queue *uwq; unsigned long start, len; uwq = container_of(wq, struct userfaultfd_wait_queue, wq); ret = 0; /* len == 0 means wake all */ start = range->start; len = range->len; if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; WRITE_ONCE(uwq->waken, true); /* * The Program-Order guarantees provided by the scheduler * ensure uwq->waken is visible before the task is woken. */ ret = wake_up_state(wq->private, mode); if (ret) { /* * Wake only once, autoremove behavior. * * After the effect of list_del_init is visible to the other * CPUs, the waitqueue may disappear from under us, see the * !list_empty_careful() in handle_userfault(). * * try_to_wake_up() has an implicit smp_mb(), and the * wq->private is read before calling the extern function * "wake_up_state" (which in turns calls try_to_wake_up). */ list_del_init(&wq->entry); } out: return ret; } /** * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { refcount_inc(&ctx->refcount); } /** * userfaultfd_ctx_put - Releases a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to userfaultfd context. * * The userfaultfd context reference must have been previously acquired either * with userfaultfd_ctx_get() or userfaultfd_ctx_fdget(). */ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) { if (refcount_dec_and_test(&ctx->refcount)) { VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fault_wqh)); VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->event_wqh)); VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } } static inline void msg_init(struct uffd_msg *msg) { BUILD_BUG_ON(sizeof(struct uffd_msg) != 32); /* * Must use memset to zero out the paddings or kernel data is * leaked to userland. */ memset(msg, 0, sizeof(struct uffd_msg)); } static inline struct uffd_msg userfault_msg(unsigned long address, unsigned long real_address, unsigned int flags, unsigned long reason, unsigned int features) { struct uffd_msg msg; msg_init(&msg); msg.event = UFFD_EVENT_PAGEFAULT; msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? real_address : address; /* * These flags indicate why the userfault occurred: * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. * - UFFD_PAGEFAULT_FLAG_MINOR indicates a minor fault. * - Neither of these flags being set indicates a MISSING fault. * * Separately, UFFD_PAGEFAULT_FLAG_WRITE indicates it was a write * fault. Otherwise, it was a read fault. */ if (flags & FAULT_FLAG_WRITE) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE; if (reason & VM_UFFD_WP) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; if (reason & VM_UFFD_MINOR) msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_MINOR; if (features & UFFD_FEATURE_THREAD_ID) msg.arg.pagefault.feat.ptid = task_pid_vnr(current); return msg; } #ifdef CONFIG_HUGETLB_PAGE /* * Same functionality as userfaultfd_must_wait below with modifications for * hugepmd ranges. */ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { struct vm_area_struct *vma = vmf->vma; pte_t *ptep, pte; bool ret = true; assert_fault_locked(vmf); ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma)); if (!ptep) goto out; ret = false; pte = huge_ptep_get(ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. PTE markers should be handled the same as none * ptes here. */ if (huge_pte_none_mostly(pte)) ret = true; if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; } #else static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { return false; /* should never get here */ } #endif /* CONFIG_HUGETLB_PAGE */ /* * Verify the pagetables are still not ok after having reigstered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * userfault that has already been resolved, if userfaultfd_read and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * threads. */ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { struct mm_struct *mm = ctx->mm; unsigned long address = vmf->address; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; pte_t ptent; bool ret = true; assert_fault_locked(vmf); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) goto out; pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); again: _pmd = pmdp_get_lockless(pmd); if (pmd_none(_pmd)) goto out; ret = false; if (!pmd_present(_pmd) || pmd_devmap(_pmd)) goto out; if (pmd_trans_huge(_pmd)) { if (!pmd_write(_pmd) && (reason & VM_UFFD_WP)) ret = true; goto out; } pte = pte_offset_map(pmd, address); if (!pte) { ret = true; goto again; } /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. PTE markers should be handled the same as none * ptes here. */ ptent = ptep_get(pte); if (pte_none_mostly(ptent)) ret = true; if (!pte_write(ptent) && (reason & VM_UFFD_WP)) ret = true; pte_unmap(pte); out: return ret; } static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) { if (flags & FAULT_FLAG_INTERRUPTIBLE) return TASK_INTERRUPTIBLE; if (flags & FAULT_FLAG_KILLABLE) return TASK_KILLABLE; return TASK_UNINTERRUPTIBLE; } /* * The locking rules involved in returning VM_FAULT_RETRY depending on * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and * FAULT_FLAG_KILLABLE are not straightforward. The "Caution" * recommendation in __lock_page_or_retry is not an understatement. * * If FAULT_FLAG_ALLOW_RETRY is set, the mmap_lock must be released * before returning VM_FAULT_RETRY only if FAULT_FLAG_RETRY_NOWAIT is * not set. * * If FAULT_FLAG_ALLOW_RETRY is set but FAULT_FLAG_KILLABLE is not * set, VM_FAULT_RETRY can still be returned if and only if there are * fatal_signal_pending()s, and the mmap_lock must be released before * returning it. */ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; vm_fault_t ret = VM_FAULT_SIGBUS; bool must_wait; unsigned int blocking_state; /* * We don't do userfault handling for the final child pid update. * * We also don't do userfault handling during * coredumping. hugetlbfs has the special * hugetlb_follow_page_mask() to skip missing pages in the * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with * the no_page_table() helper in follow_page_mask(), but the * shmem_vm_ops->fault method is invoked even during * coredumping and it ends up here. */ if (current->flags & (PF_EXITING|PF_DUMPCORE)) goto out; assert_fault_locked(vmf); ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) goto out; BUG_ON(ctx->mm != mm); /* Any unrecognized flag is a bug. */ VM_BUG_ON(reason & ~__VM_UFFD_FLAGS); /* 0 or > 1 flags set is a bug; we expect exactly 1. */ VM_BUG_ON(!reason || (reason & (reason - 1))); if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY)) goto out; /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_lock. */ if (unlikely(READ_ONCE(ctx->released))) { /* * Don't return VM_FAULT_SIGBUS in this case, so a non * cooperative manager can close the uffd after the * last UFFDIO_COPY, without risking to trigger an * involuntary SIGBUS if the process was starting the * userfaultfd while the userfaultfd was still armed * (but after the last UFFDIO_COPY). If the uffd * wasn't already closed when the userfault reached * this point, that would normally be solved by * userfaultfd_must_wait returning 'false'. * * If we were to return VM_FAULT_SIGBUS here, the non * cooperative manager would be instead forced to * always call UFFDIO_UNREGISTER before it can safely * close the uffd. */ ret = VM_FAULT_NOPAGE; goto out; } /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY * even if FAULT_FLAG_TRIED is set without leading to gup() * -EBUSY failures, if the userfaultfd is to be extended for * VM_UFFD_WP tracking and we intend to arm the userfault * without first stopping userland access to the memory. For * VM_UFFD_MISSING userfaults this is enough for now. */ if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) { /* * Validate the invariant that nowait must allow retry * to be sure not to return SIGBUS erroneously on * nowait invocations. */ BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT); #ifdef CONFIG_DEBUG_VM if (printk_ratelimit()) { printk(KERN_WARNING "FAULT_FLAG_ALLOW_RETRY missing %x\n", vmf->flags); dump_stack(); } #endif goto out; } /* * Handle nowait, not much to do other than tell it to retry * and wait. */ ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out; /* take the reference before dropping the mmap_lock */ userfaultfd_ctx_get(ctx); init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, reason, ctx->features); uwq.ctx = ctx; uwq.waken = false; blocking_state = userfaultfd_get_blocking_state(vmf->flags); /* * Take the vma lock now, in order to safely call * userfaultfd_huge_must_wait() later. Since acquiring the * (sleepable) vma lock can modify the current task state, that * must be before explicitly calling set_current_state(). */ if (is_vm_hugetlb_page(vma)) hugetlb_vma_lock_read(vma); spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->fault_pending_wqh, &uwq.wq); /* * The smp_mb() after __set_current_state prevents the reads * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ set_current_state(blocking_state); spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vma)) must_wait = userfaultfd_must_wait(ctx, vmf, reason); else must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason); if (is_vm_hugetlb_page(vma)) hugetlb_vma_unlock_read(vma); release_fault_lock(vmf); if (likely(must_wait && !READ_ONCE(ctx->released))) { wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); } __set_current_state(TASK_RUNNING); /* * Here we race with the list_del; list_add in * userfaultfd_ctx_read(), however because we don't ever run * list_del_init() to refile across the two lists, the prev * and next pointers will never point to self. list_add also * would never let any of the two pointers to point to * self. So list_empty_careful won't risk to see both pointers * pointing to self at any time during the list refile. The * only case where list_del_init() is called is the full * removal in the wake function and there we don't re-list_add * and it's fine not to block on the spinlock. The uwq on this * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* * ctx may go away after this if the userfault pseudo fd is * already released. */ userfaultfd_ctx_put(ctx); out: return ret; } static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { struct userfaultfd_ctx *release_new_ctx; if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). */ __add_wait_queue(&ctx->event_wqh, &ewq->wq); for (;;) { set_current_state(TASK_KILLABLE); if (ewq->msg.event == 0) break; if (READ_ONCE(ctx->released) || fatal_signal_pending(current)) { /* * &ewq->wq may be queued in fork_event, but * __remove_wait_queue ignores the head * parameter. It would be a problem if it * didn't. */ __remove_wait_queue(&ctx->event_wqh, &ewq->wq); if (ewq->msg.event == UFFD_EVENT_FORK) { struct userfaultfd_ctx *new; new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; release_new_ctx = new; } break; } spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; struct mm_struct *mm = release_new_ctx->mm; VMA_ITERATOR(vmi, mm, 0); /* the various vma->vm_userfaultfd_ctx still points to it */ mmap_write_lock(mm); for_each_vma(vmi, vma) { if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma_start_write(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } mmap_write_unlock(mm); userfaultfd_ctx_put(release_new_ctx); } /* * ctx may go away after this if the userfault pseudo fd is * already released. */ out: atomic_dec(&ctx->mmap_changing); VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0); userfaultfd_ctx_put(ctx); } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { ewq->msg.event = 0; wake_up_locked(&ctx->event_wqh); __remove_wait_queue(&ctx->event_wqh, &ewq->wq); } int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) { struct userfaultfd_ctx *ctx = NULL, *octx; struct userfaultfd_fork_ctx *fctx; octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma_start_write(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); return 0; } list_for_each_entry(fctx, fcs, list) if (fctx->orig == octx) { ctx = fctx->new; break; } if (!ctx) { fctx = kmalloc(sizeof(*fctx), GFP_KERNEL); if (!fctx) return -ENOMEM; ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) { kfree(fctx); return -ENOMEM; } refcount_set(&ctx->refcount, 1); ctx->flags = octx->flags; ctx->features = octx->features; ctx->released = false; atomic_set(&ctx->mmap_changing, 0); ctx->mm = vma->vm_mm; mmgrab(ctx->mm); userfaultfd_ctx_get(octx); atomic_inc(&octx->mmap_changing); fctx->orig = octx; fctx->new = ctx; list_add_tail(&fctx->list, fcs); } vma->vm_userfaultfd_ctx.ctx = ctx; return 0; } static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } } void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *vm_ctx) { struct userfaultfd_ctx *ctx; ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx) return; if (ctx->features & UFFD_FEATURE_EVENT_REMAP) { vm_ctx->ctx = ctx; userfaultfd_ctx_get(ctx); atomic_inc(&ctx->mmap_changing); } else { /* Drop uffd context if remap feature not enabled */ vma_start_write(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, unsigned long from, unsigned long to, unsigned long len) { struct userfaultfd_ctx *ctx = vm_ctx->ctx; struct userfaultfd_wait_queue ewq; if (!ctx) return; if (to & ~PAGE_MASK) { userfaultfd_ctx_put(ctx); return; } msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMAP; ewq.msg.arg.remap.from = from; ewq.msg.arg.remap.to = to; ewq.msg.arg.remap.len = len; userfaultfd_event_wait_completion(ctx, &ewq); } bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue ewq; ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) return true; userfaultfd_ctx_get(ctx); atomic_inc(&ctx->mmap_changing); mmap_read_unlock(mm); msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; ewq.msg.arg.remove.start = start; ewq.msg.arg.remove.end = end; userfaultfd_event_wait_completion(ctx, &ewq); return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, unsigned long start, unsigned long end) { struct userfaultfd_unmap_ctx *unmap_ctx; list_for_each_entry(unmap_ctx, unmaps, list) if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && unmap_ctx->end == end) return true; return false; } int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *unmaps) { struct userfaultfd_unmap_ctx *unmap_ctx; struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || has_unmap_ctx(ctx, unmaps, start, end)) return 0; unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); if (!unmap_ctx) return -ENOMEM; userfaultfd_ctx_get(ctx); atomic_inc(&ctx->mmap_changing); unmap_ctx->ctx = ctx; unmap_ctx->start = start; unmap_ctx->end = end; list_add_tail(&unmap_ctx->list, unmaps); return 0; } void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { struct userfaultfd_unmap_ctx *ctx, *n; struct userfaultfd_wait_queue ewq; list_for_each_entry_safe(ctx, n, uf, list) { msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_UNMAP; ewq.msg.arg.remove.start = ctx->start; ewq.msg.arg.remove.end = ctx->end; userfaultfd_event_wait_completion(ctx->ctx, &ewq); list_del(&ctx->list); kfree(ctx); } } static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *prev; /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; VMA_ITERATOR(vmi, mm, 0); WRITE_ONCE(ctx->released, true); if (!mmget_not_zero(mm)) goto wakeup; /* * Flush page faults out of all CPUs. NOTE: all page faults * must be retried without returning VM_FAULT_SIGBUS if * userfaultfd_ctx_get() succeeds but vma->vma_userfault_ctx * changes while handle_userfault released the mmap_lock. So * it's critical that released is set to true (above), before * taking the mmap_lock for writing. */ mmap_write_lock(mm); prev = NULL; for_each_vma(vmi, vma) { cond_resched(); BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^ !!(vma->vm_flags & __VM_UFFD_FLAGS)); if (vma->vm_userfaultfd_ctx.ctx != ctx) { prev = vma; continue; } new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; vma = vma_modify_flags_uffd(&vmi, prev, vma, vma->vm_start, vma->vm_end, new_flags, NULL_VM_UFFD_CTX); vma_start_write(vma); userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; prev = vma; } mmap_write_unlock(mm); mmput(mm); wakeup: /* * After no new page faults can wait on this fault_*wqh, flush * the last page faults that may have been already waiting on * the fault_*wqh. */ spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); wake_up_poll(&ctx->fd_wqh, EPOLLHUP); userfaultfd_ctx_put(ctx); return 0; } /* fault_pending_wqh.lock must be hold by the caller */ static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_head_t *wqh) { wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; lockdep_assert_held(&wqh->lock); uwq = NULL; if (!waitqueue_active(wqh)) goto out; /* walk in reverse to provide FIFO behavior to read userfaults */ wq = list_last_entry(&wqh->head, typeof(*wq), entry); uwq = container_of(wq, struct userfaultfd_wait_queue, wq); out: return uwq; } static inline struct userfaultfd_wait_queue *find_userfault( struct userfaultfd_ctx *ctx) { return find_userfault_in(&ctx->fault_pending_wqh); } static inline struct userfaultfd_wait_queue *find_userfault_evt( struct userfaultfd_ctx *ctx) { return find_userfault_in(&ctx->event_wqh); } static __poll_t userfaultfd_poll(struct file *file, poll_table *wait) { struct userfaultfd_ctx *ctx = file->private_data; __poll_t ret; poll_wait(file, &ctx->fd_wqh, wait); if (!userfaultfd_is_initialized(ctx)) return EPOLLERR; /* * poll() never guarantees that read won't block. * userfaults can be waken before they're read(). */ if (unlikely(!(file->f_flags & O_NONBLOCK))) return EPOLLERR; /* * lockless access to see if there are pending faults * __pollwait last action is the add_wait_queue but * the spin_unlock would allow the waitqueue_active to * pass above the actual list_add inside * add_wait_queue critical section. So use a full * memory barrier to serialize the list_add write of * add_wait_queue() with the waitqueue_active read * below. */ ret = 0; smp_mb(); if (waitqueue_active(&ctx->fault_pending_wqh)) ret = EPOLLIN; else if (waitqueue_active(&ctx->event_wqh)) ret = EPOLLIN; return ret; } static const struct file_operations userfaultfd_fops; static int resolve_userfault_fork(struct userfaultfd_ctx *new, struct inode *inode, struct uffd_msg *msg) { int fd; fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new, O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); if (fd < 0) return fd; msg->arg.reserved.reserved1 = 0; msg->arg.fork.ufd = fd; return 0; } static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, struct uffd_msg *msg, struct inode *inode) { ssize_t ret; DECLARE_WAITQUEUE(wait, current); struct userfaultfd_wait_queue *uwq; /* * Handling fork event requires sleeping operations, so * we drop the event_wqh lock, then do these ops, then * lock it back and wake up the waiter. While the lock is * dropped the ewq may go away so we keep track of it * carefully. */ LIST_HEAD(fork_event); struct userfaultfd_ctx *fork_nctx = NULL; /* always take the fd_wqh lock before the fault_pending_wqh lock */ spin_lock_irq(&ctx->fd_wqh.lock); __add_wait_queue(&ctx->fd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); spin_lock(&ctx->fault_pending_wqh.lock); uwq = find_userfault(ctx); if (uwq) { /* * Use a seqcount to repeat the lockless check * in wake_userfault() to avoid missing * wakeups because during the refile both * waitqueue could become empty if this is the * only userfault. */ write_seqcount_begin(&ctx->refile_seq); /* * The fault_pending_wqh.lock prevents the uwq * to disappear from under us. * * Refile this userfault from * fault_pending_wqh to fault_wqh, it's not * pending anymore after we read it. * * Use list_del() by hand (as * userfaultfd_wake_function also uses * list_del_init() by hand) to be sure nobody * changes __remove_wait_queue() to use * list_del_init() in turn breaking the * !list_empty_careful() check in * handle_userfault(). The uwq->wq.head list * must never be empty at any time during the * refile, or the waitqueue could disappear * from under us. The "wait_queue_head_t" * parameter of __remove_wait_queue() is unused * anyway. */ list_del(&uwq->wq.entry); add_wait_queue(&ctx->fault_wqh, &uwq->wq); write_seqcount_end(&ctx->refile_seq); /* careful to always initialize msg if ret == 0 */ *msg = uwq->msg; spin_unlock(&ctx->fault_pending_wqh.lock); ret = 0; break; } spin_unlock(&ctx->fault_pending_wqh.lock); spin_lock(&ctx->event_wqh.lock); uwq = find_userfault_evt(ctx); if (uwq) { *msg = uwq->msg; if (uwq->msg.event == UFFD_EVENT_FORK) { fork_nctx = (struct userfaultfd_ctx *) (unsigned long) uwq->msg.arg.reserved.reserved1; list_move(&uwq->wq.entry, &fork_event); /* * fork_nctx can be freed as soon as * we drop the lock, unless we take a * reference on it. */ userfaultfd_ctx_get(fork_nctx); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; } userfaultfd_event_complete(ctx, uwq); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; } spin_unlock(&ctx->event_wqh.lock); if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (no_wait) { ret = -EAGAIN; break; } spin_unlock_irq(&ctx->fd_wqh.lock); schedule(); spin_lock_irq(&ctx->fd_wqh.lock); } __remove_wait_queue(&ctx->fd_wqh, &wait); __set_current_state(TASK_RUNNING); spin_unlock_irq(&ctx->fd_wqh.lock); if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(fork_nctx, inode, msg); spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can * drop the temporary refcount. */ userfaultfd_ctx_put(fork_nctx); uwq = list_first_entry(&fork_event, typeof(*uwq), wq.entry); /* * If fork_event list wasn't empty and in turn * the event wasn't already released by fork * (the event is allocated on fork kernel * stack), put the event back to its place in * the event_wq. fork_event head will be freed * as soon as we return so the event cannot * stay queued there no matter the current * "ret" value. */ list_del(&uwq->wq.entry); __add_wait_queue(&ctx->event_wqh, &uwq->wq); /* * Leave the event in the waitqueue and report * error to userland if we failed to resolve * the userfault fork. */ if (likely(!ret)) userfaultfd_event_complete(ctx, uwq); } else { /* * Here the fork thread aborted and the * refcount from the fork thread on fork_nctx * has already been released. We still hold * the reference we took before releasing the * lock above. If resolve_userfault_fork * failed we've to drop it because the * fork_nctx has to be freed in such case. If * it succeeded we'll hold it because the new * uffd references it. */ if (ret) userfaultfd_ctx_put(fork_nctx); } spin_unlock_irq(&ctx->event_wqh.lock); } return ret; } static ssize_t userfaultfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct userfaultfd_ctx *ctx = file->private_data; ssize_t _ret, ret = 0; struct uffd_msg msg; int no_wait = file->f_flags & O_NONBLOCK; struct inode *inode = file_inode(file); if (!userfaultfd_is_initialized(ctx)) return -EINVAL; for (;;) { if (count < sizeof(msg)) return ret ? ret : -EINVAL; _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); if (_ret < 0) return ret ? ret : _ret; if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg))) return ret ? ret : -EFAULT; ret += sizeof(msg); buf += sizeof(msg); count -= sizeof(msg); /* * Allow to read more than one fault at time but only * block if waiting for the very first one. */ no_wait = O_NONBLOCK; } } static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { unsigned seq; bool need_wakeup; /* * To be sure waitqueue_active() is not reordered by the CPU * before the pagetable update, use an explicit SMP memory * barrier here. PT lock release or mmap_read_unlock(mm) still * have release semantics that can allow the * waitqueue_active() to be reordered before the pte update. */ smp_mb(); /* * Use waitqueue_active because it's very frequent to * change the address space atomically even if there are no * userfaults yet. So we take the spinlock only when we're * sure we've userfaults to wake. */ do { seq = read_seqcount_begin(&ctx->refile_seq); need_wakeup = waitqueue_active(&ctx->fault_pending_wqh) || waitqueue_active(&ctx->fault_wqh); cond_resched(); } while (read_seqcount_retry(&ctx->refile_seq, seq)); if (need_wakeup) __wake_userfault(ctx, range); } static __always_inline int validate_unaligned_range( struct mm_struct *mm, __u64 start, __u64 len) { __u64 task_size = mm->task_size; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; if (start < mmap_min_addr) return -EINVAL; if (start >= task_size) return -EINVAL; if (len > task_size - start) return -EINVAL; if (start + len <= start) return -EINVAL; return 0; } static __always_inline int validate_range(struct mm_struct *mm, __u64 start, __u64 len) { if (start & ~PAGE_MASK) return -EINVAL; return validate_unaligned_range(mm, start, len); } static int userfaultfd_register(struct userfaultfd_ctx *ctx, unsigned long arg) { struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *prev, *cur; int ret; struct uffdio_register uffdio_register; struct uffdio_register __user *user_uffdio_register; unsigned long vm_flags, new_flags; bool found; bool basic_ioctls; unsigned long start, end, vma_end; struct vma_iterator vmi; bool wp_async = userfaultfd_wp_async_ctx(ctx); user_uffdio_register = (struct uffdio_register __user *) arg; ret = -EFAULT; if (copy_from_user(&uffdio_register, user_uffdio_register, sizeof(uffdio_register)-sizeof(__u64))) goto out; ret = -EINVAL; if (!uffdio_register.mode) goto out; if (uffdio_register.mode & ~UFFD_API_REGISTER_MODES) goto out; vm_flags = 0; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING) vm_flags |= VM_UFFD_MISSING; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) { #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP goto out; #endif vm_flags |= VM_UFFD_WP; } if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) { #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR goto out; #endif vm_flags |= VM_UFFD_MINOR; } ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; start = uffdio_register.range.start; end = start + uffdio_register.range.len; ret = -ENOMEM; if (!mmget_not_zero(mm)) goto out; ret = -EINVAL; mmap_write_lock(mm); vma_iter_init(&vmi, mm, start); vma = vma_find(&vmi, end); if (!vma) goto out_unlock; /* * If the first vma contains huge pages, make sure start address * is aligned to huge page size. */ if (is_vm_hugetlb_page(vma)) { unsigned long vma_hpagesize = vma_kernel_pagesize(vma); if (start & (vma_hpagesize - 1)) goto out_unlock; } /* * Search for not compatible vmas. */ found = false; basic_ioctls = false; cur = vma; do { cond_resched(); BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* check not compatible vmas */ ret = -EINVAL; if (!vma_can_userfault(cur, vm_flags, wp_async)) goto out_unlock; /* * UFFDIO_COPY will fill file holes even without * PROT_WRITE. This check enforces that if this is a * MAP_SHARED, the process has write permission to the backing * file. If VM_MAYWRITE is set it also enforces that on a * MAP_SHARED vma: there is no F_WRITE_SEAL and no further * F_WRITE_SEAL can be taken until the vma is destroyed. */ ret = -EPERM; if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) goto out_unlock; /* * If this vma contains ending address, and huge pages * check alignment. */ if (is_vm_hugetlb_page(cur) && end <= cur->vm_end && end > cur->vm_start) { unsigned long vma_hpagesize = vma_kernel_pagesize(cur); ret = -EINVAL; if (end & (vma_hpagesize - 1)) goto out_unlock; } if ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE)) goto out_unlock; /* * Check that this vma isn't already owned by a * different userfaultfd. We can't allow more than one * userfaultfd to own a single vma simultaneously or we * wouldn't know which one to deliver the userfaults to. */ ret = -EBUSY; if (cur->vm_userfaultfd_ctx.ctx && cur->vm_userfaultfd_ctx.ctx != ctx) goto out_unlock; /* * Note vmas containing huge pages */ if (is_vm_hugetlb_page(cur)) basic_ioctls = true; found = true; } for_each_vma_range(vmi, cur, end); BUG_ON(!found); vma_iter_set(&vmi, start); prev = vma_prev(&vmi); if (vma->vm_start < start) prev = vma; ret = 0; for_each_vma_range(vmi, vma, end) { cond_resched(); BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async)); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this * userfaultfd and with the right tracking mode too. */ if (vma->vm_userfaultfd_ctx.ctx == ctx && (vma->vm_flags & vm_flags) == vm_flags) goto skip; if (vma->vm_start > start) start = vma->vm_start; vma_end = min(end, vma->vm_end); new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, new_flags, (struct vm_userfaultfd_ctx){ctx}); if (IS_ERR(vma)) { ret = PTR_ERR(vma); break; } /* * In the vma_merge() successful mprotect-like case 8: * the next vma was merged into the current one and * the current one has not been updated yet. */ vma_start_write(vma); userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx.ctx = ctx; if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) hugetlb_unshare_all_pmds(vma); skip: prev = vma; start = vma->vm_end; } out_unlock: mmap_write_unlock(mm); mmput(mm); if (!ret) { __u64 ioctls_out; ioctls_out = basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC : UFFD_API_RANGE_IOCTLS; /* * Declare the WP ioctl only if the WP mode is * specified and all checks passed with the range */ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)) ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT); /* CONTINUE ioctl is only supported for MINOR ranges. */ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR)) ioctls_out &= ~((__u64)1 << _UFFDIO_CONTINUE); /* * Now that we scanned all vmas we can already tell * userland which ioctls methods are guaranteed to * succeed on this range. */ if (put_user(ioctls_out, &user_uffdio_register->ioctls)) ret = -EFAULT; } out: return ret; } static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, unsigned long arg) { struct mm_struct *mm = ctx->mm; struct vm_area_struct *vma, *prev, *cur; int ret; struct uffdio_range uffdio_unregister; unsigned long new_flags; bool found; unsigned long start, end, vma_end; const void __user *buf = (void __user *)arg; struct vma_iterator vmi; bool wp_async = userfaultfd_wp_async_ctx(ctx); ret = -EFAULT; if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; start = uffdio_unregister.start; end = start + uffdio_unregister.len; ret = -ENOMEM; if (!mmget_not_zero(mm)) goto out; mmap_write_lock(mm); ret = -EINVAL; vma_iter_init(&vmi, mm, start); vma = vma_find(&vmi, end); if (!vma) goto out_unlock; /* * If the first vma contains huge pages, make sure start address * is aligned to huge page size. */ if (is_vm_hugetlb_page(vma)) { unsigned long vma_hpagesize = vma_kernel_pagesize(vma); if (start & (vma_hpagesize - 1)) goto out_unlock; } /* * Search for not compatible vmas. */ found = false; cur = vma; do { cond_resched(); BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* * Check not compatible vmas, not strictly required * here as not compatible vmas cannot have an * userfaultfd_ctx registered on them, but this * provides for more strict behavior to notice * unregistration errors. */ if (!vma_can_userfault(cur, cur->vm_flags, wp_async)) goto out_unlock; found = true; } for_each_vma_range(vmi, cur, end); BUG_ON(!found); vma_iter_set(&vmi, start); prev = vma_prev(&vmi); if (vma->vm_start < start) prev = vma; ret = 0; for_each_vma_range(vmi, vma, end) { cond_resched(); BUG_ON(!vma_can_userfault(vma, vma->vm_flags, wp_async)); /* * Nothing to do: this vma is already registered into this * userfaultfd and with the right tracking mode too. */ if (!vma->vm_userfaultfd_ctx.ctx) goto skip; WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); if (vma->vm_start > start) start = vma->vm_start; vma_end = min(end, vma->vm_end); if (userfaultfd_missing(vma)) { /* * Wake any concurrent pending userfault while * we unregister, so they will not hang * permanently and it avoids userland to call * UFFDIO_WAKE explicitly. */ struct userfaultfd_wake_range range; range.start = start; range.len = vma_end - start; wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); } /* Reset ptes for the whole vma range if wr-protected */ if (userfaultfd_wp(vma)) uffd_wp_range(vma, start, vma_end - start, false); new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, new_flags, NULL_VM_UFFD_CTX); if (IS_ERR(vma)) { ret = PTR_ERR(vma); break; } /* * In the vma_merge() successful mprotect-like case 8: * the next vma was merged into the current one and * the current one has not been updated yet. */ vma_start_write(vma); userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; skip: prev = vma; start = vma->vm_end; } out_unlock: mmap_write_unlock(mm); mmput(mm); out: return ret; } /* * userfaultfd_wake may be used in combination with the * UFFDIO_*_MODE_DONTWAKE to wakeup userfaults in batches. */ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, unsigned long arg) { int ret; struct uffdio_range uffdio_wake; struct userfaultfd_wake_range range; const void __user *buf = (void __user *)arg; ret = -EFAULT; if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; range.start = uffdio_wake.start; range.len = uffdio_wake.len; /* * len == 0 means wake all and we don't want to wake all here, * so check it again to be sure. */ VM_BUG_ON(!range.len); wake_userfault(ctx, &range); ret = 0; out: return ret; } static int userfaultfd_copy(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_copy uffdio_copy; struct uffdio_copy __user *user_uffdio_copy; struct userfaultfd_wake_range range; uffd_flags_t flags = 0; user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, /* don't copy "copy" last field */ sizeof(uffdio_copy)-sizeof(__s64))) goto out; ret = validate_unaligned_range(ctx->mm, uffdio_copy.src, uffdio_copy.len); if (ret) goto out; ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; ret = -EINVAL; if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP)) goto out; if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP) flags |= MFILL_ATOMIC_WP; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src, uffdio_copy.len, &ctx->mmap_changing, flags); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; if (ret < 0) goto out; BUG_ON(!ret); /* len == 0 would wake all */ range.len = ret; if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE)) { range.start = uffdio_copy.dst; wake_userfault(ctx, &range); } ret = range.len == uffdio_copy.len ? 0 : -EAGAIN; out: return ret; } static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_zeropage uffdio_zeropage; struct uffdio_zeropage __user *user_uffdio_zeropage; struct userfaultfd_wake_range range; user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, /* don't copy "zeropage" last field */ sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE) goto out; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_zeropage(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len, &ctx->mmap_changing); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ BUG_ON(!ret); range.len = ret; if (!(uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_DONTWAKE)) { range.start = uffdio_zeropage.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_zeropage.range.len ? 0 : -EAGAIN; out: return ret; } static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, unsigned long arg) { int ret; struct uffdio_writeprotect uffdio_wp; struct uffdio_writeprotect __user *user_uffdio_wp; struct userfaultfd_wake_range range; bool mode_wp, mode_dontwake; if (atomic_read(&ctx->mmap_changing)) return -EAGAIN; user_uffdio_wp = (struct uffdio_writeprotect __user *) arg; if (copy_from_user(&uffdio_wp, user_uffdio_wp, sizeof(struct uffdio_writeprotect))) return -EFAULT; ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE | UFFDIO_WRITEPROTECT_MODE_WP)) return -EINVAL; mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP; mode_dontwake = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE; if (mode_wp && mode_dontwake) return -EINVAL; if (mmget_not_zero(ctx->mm)) { ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len, mode_wp, &ctx->mmap_changing); mmput(ctx->mm); } else { return -ESRCH; } if (ret) return ret; if (!mode_wp && !mode_dontwake) { range.start = uffdio_wp.range.start; range.len = uffdio_wp.range.len; wake_userfault(ctx, &range); } return ret; } static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_continue uffdio_continue; struct uffdio_continue __user *user_uffdio_continue; struct userfaultfd_wake_range range; uffd_flags_t flags = 0; user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, /* don't copy the output fields */ sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE | UFFDIO_CONTINUE_MODE_WP)) goto out; if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WP) flags |= MFILL_ATOMIC_WP; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_continue(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len, &ctx->mmap_changing, flags); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ BUG_ON(!ret); range.len = ret; if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) { range.start = uffdio_continue.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_continue.range.len ? 0 : -EAGAIN; out: return ret; } static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long arg) { __s64 ret; struct uffdio_poison uffdio_poison; struct uffdio_poison __user *user_uffdio_poison; struct userfaultfd_wake_range range; user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, /* don't copy the output fields */ sizeof(uffdio_poison) - (sizeof(__s64)))) goto out; ret = validate_range(ctx->mm, uffdio_poison.range.start, uffdio_poison.range.len); if (ret) goto out; ret = -EINVAL; if (uffdio_poison.mode & ~UFFDIO_POISON_MODE_DONTWAKE) goto out; if (mmget_not_zero(ctx->mm)) { ret = mfill_atomic_poison(ctx->mm, uffdio_poison.range.start, uffdio_poison.range.len, &ctx->mmap_changing, 0); mmput(ctx->mm); } else { return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_poison->updated))) return -EFAULT; if (ret < 0) goto out; /* len == 0 would wake all */ BUG_ON(!ret); range.len = ret; if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) { range.start = uffdio_poison.range.start; wake_userfault(ctx, &range); } ret = range.len == uffdio_poison.range.len ? 0 : -EAGAIN; out: return ret; } bool userfaultfd_wp_async(struct vm_area_struct *vma) { return userfaultfd_wp_async_ctx(vma->vm_userfaultfd_ctx.ctx); } static inline unsigned int uffd_ctx_features(__u64 user_features) { /* * For the current set of features the bits just coincide. Set * UFFD_FEATURE_INITIALIZED to mark the features as enabled. */ return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED; } /* * userland asks for a certain API version and we return which bits * and ioctl commands are implemented in this kernel for such API * version or -EINVAL if unknown. */ static int userfaultfd_api(struct userfaultfd_ctx *ctx, unsigned long arg) { struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; unsigned int ctx_features; int ret; __u64 features; ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; features = uffdio_api.features; ret = -EINVAL; if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) goto err_out; /* WP_ASYNC relies on WP_UNPOPULATED, choose it unconditionally */ if (features & UFFD_FEATURE_WP_ASYNC) features |= UFFD_FEATURE_WP_UNPOPULATED; /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR uffdio_api.features &= ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); #endif #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; #endif #ifndef CONFIG_PTE_MARKER_UFFD_WP uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; #endif uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; /* only enable the requested features for this uffd context */ ctx_features = uffd_ctx_features(features); ret = -EINVAL; if (cmpxchg(&ctx->features, 0, ctx_features) != 0) goto err_out; ret = 0; out: return ret; err_out: memset(&uffdio_api, 0, sizeof(uffdio_api)); if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) ret = -EFAULT; goto out; } static long userfaultfd_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret = -EINVAL; struct userfaultfd_ctx *ctx = file->private_data; if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx)) return -EINVAL; switch(cmd) { case UFFDIO_API: ret = userfaultfd_api(ctx, arg); break; case UFFDIO_REGISTER: ret = userfaultfd_register(ctx, arg); break; case UFFDIO_UNREGISTER: ret = userfaultfd_unregister(ctx, arg); break; case UFFDIO_WAKE: ret = userfaultfd_wake(ctx, arg); break; case UFFDIO_COPY: ret = userfaultfd_copy(ctx, arg); break; case UFFDIO_ZEROPAGE: ret = userfaultfd_zeropage(ctx, arg); break; case UFFDIO_WRITEPROTECT: ret = userfaultfd_writeprotect(ctx, arg); break; case UFFDIO_CONTINUE: ret = userfaultfd_continue(ctx, arg); break; case UFFDIO_POISON: ret = userfaultfd_poison(ctx, arg); break; } return ret; } #ifdef CONFIG_PROC_FS static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; } list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown * separated by a space. Like this: * protocols: aa:... bb:... */ seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n", pending, total, UFFD_API, ctx->features, UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS); } #endif static const struct file_operations userfaultfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = userfaultfd_show_fdinfo, #endif .release = userfaultfd_release, .poll = userfaultfd_poll, .read = userfaultfd_read, .unlocked_ioctl = userfaultfd_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static void init_once_userfaultfd_ctx(void *mem) { struct userfaultfd_ctx *ctx = (struct userfaultfd_ctx *) mem; init_waitqueue_head(&ctx->fault_pending_wqh); init_waitqueue_head(&ctx->fault_wqh); init_waitqueue_head(&ctx->event_wqh); init_waitqueue_head(&ctx->fd_wqh); seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock); } static int new_userfaultfd(int flags) { struct userfaultfd_ctx *ctx; int fd; BUG_ON(!current->mm); /* Check the UFFD_* constants for consistency. */ BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS); BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK); if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY)) return -EINVAL; ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) return -ENOMEM; refcount_set(&ctx->refcount, 1); ctx->flags = flags; ctx->features = 0; ctx->released = false; atomic_set(&ctx->mmap_changing, 0); ctx->mm = current->mm; /* prevent the mm struct to be freed */ mmgrab(ctx->mm); fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx, O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); if (fd < 0) { mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } return fd; } static inline bool userfaultfd_syscall_allowed(int flags) { /* Userspace-only page faults are always allowed */ if (flags & UFFD_USER_MODE_ONLY) return true; /* * The user is requesting a userfaultfd which can handle kernel faults. * Privileged users are always allowed to do this. */ if (capable(CAP_SYS_PTRACE)) return true; /* Otherwise, access to kernel fault handling is sysctl controlled. */ return sysctl_unprivileged_userfaultfd; } SYSCALL_DEFINE1(userfaultfd, int, flags) { if (!userfaultfd_syscall_allowed(flags)) return -EPERM; return new_userfaultfd(flags); } static long userfaultfd_dev_ioctl(struct file *file, unsigned int cmd, unsigned long flags) { if (cmd != USERFAULTFD_IOC_NEW) return -EINVAL; return new_userfaultfd(flags); } static const struct file_operations userfaultfd_dev_fops = { .unlocked_ioctl = userfaultfd_dev_ioctl, .compat_ioctl = userfaultfd_dev_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice userfaultfd_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "userfaultfd", .fops = &userfaultfd_dev_fops }; static int __init userfaultfd_init(void) { int ret; ret = misc_register(&userfaultfd_misc); if (ret) return ret; userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache", sizeof(struct userfaultfd_ctx), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once_userfaultfd_ctx); #ifdef CONFIG_SYSCTL register_sysctl_init("vm", vm_userfaultfd_table); #endif return 0; } __initcall(userfaultfd_init); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/errno.h> #include <linux/tty.h> #include <linux/module.h> /* * n_null.c - Null line discipline used in the failure path * * Copyright (C) Intel 2017 */ static ssize_t n_null_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t nr, void **cookie, unsigned long offset) { return -EOPNOTSUPP; } static ssize_t n_null_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t nr) { return -EOPNOTSUPP; } static struct tty_ldisc_ops null_ldisc = { .owner = THIS_MODULE, .num = N_NULL, .name = "n_null", .read = n_null_read, .write = n_null_write, }; static int __init n_null_init(void) { BUG_ON(tty_register_ldisc(&null_ldisc)); return 0; } static void __exit n_null_exit(void) { tty_unregister_ldisc(&null_ldisc); } module_init(n_null_init); module_exit(n_null_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alan Cox"); MODULE_ALIAS_LDISC(N_NULL); MODULE_DESCRIPTION("Null ldisc driver"); |
3 1 1 3 2 5 3 6 1 8 12 8 3 8 4 3 5 5 2 8 12 8 14 2 12 8 5 1 8 7 1 4 4 4 1 3 5 5 5 8 4 4 4 4 4 4 1 1 1 1 1 1 1 || // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. * * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The * PFNs can be placed into an iommu_domain, or returned to the caller as a page * list for access by an in-kernel user. * * The datastructure uses the iopt_pages to optimize the storage of the PFNs * between the domains and xarray. */ #include <linux/iommufd.h> #include <linux/lockdep.h> #include <linux/iommu.h> #include <linux/sched/mm.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/errno.h> #include <uapi/linux/iommufd.h> #include "io_pagetable.h" #include "double_span.h" struct iopt_pages_list { struct iopt_pages *pages; struct iopt_area *area; struct list_head next; unsigned long start_byte; unsigned long length; }; struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, struct io_pagetable *iopt, unsigned long iova, unsigned long last_iova) { lockdep_assert_held(&iopt->iova_rwsem); iter->cur_iova = iova; iter->last_iova = last_iova; iter->area = iopt_area_iter_first(iopt, iova, iova); if (!iter->area) return NULL; if (!iter->area->pages) { iter->area = NULL; return NULL; } return iter->area; } struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) { unsigned long last_iova; if (!iter->area) return NULL; last_iova = iopt_area_last_iova(iter->area); if (iter->last_iova <= last_iova) return NULL; iter->cur_iova = last_iova + 1; iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, iter->last_iova); if (!iter->area) return NULL; if (iter->cur_iova != iopt_area_iova(iter->area) || !iter->area->pages) { iter->area = NULL; return NULL; } return iter->area; } static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { if (span->is_used || span->last_hole - span->start_hole < length - 1) return false; span->start_hole = ALIGN(span->start_hole, iova_alignment) | page_offset; if (span->start_hole > span->last_hole || span->last_hole - span->start_hole < length - 1) return false; return true; } static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { if (span->is_hole || span->last_used - span->start_used < length - 1) return false; span->start_used = ALIGN(span->start_used, iova_alignment) | page_offset; if (span->start_used > span->last_used || span->last_used - span->start_used < length - 1) return false; return true; } /* * Automatically find a block of IOVA that is not being used and not reserved. * Does not return a 0 IOVA even if it is valid. */ static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, unsigned long uptr, unsigned long length) { unsigned long page_offset = uptr % PAGE_SIZE; struct interval_tree_double_span_iter used_span; struct interval_tree_span_iter allowed_span; unsigned long iova_alignment; lockdep_assert_held(&iopt->iova_rwsem); /* Protect roundup_pow-of_two() from overflow */ if (length == 0 || length >= ULONG_MAX / 2) return -EOVERFLOW; /* * Keep alignment present in the uptr when building the IOVA, this * increases the chance we can map a THP. */ if (!uptr) iova_alignment = roundup_pow_of_two(length); else iova_alignment = min_t(unsigned long, roundup_pow_of_two(length), 1UL << __ffs64(uptr)); if (iova_alignment < iopt->iova_alignment) return -EINVAL; interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { allowed_span.start_used = PAGE_SIZE; allowed_span.last_used = ULONG_MAX - PAGE_SIZE; allowed_span.is_hole = false; } if (!__alloc_iova_check_used(&allowed_span, length, iova_alignment, page_offset)) continue; interval_tree_for_each_double_span( &used_span, &iopt->reserved_itree, &iopt->area_itree, allowed_span.start_used, allowed_span.last_used) { if (!__alloc_iova_check_hole(&used_span, length, iova_alignment, page_offset)) continue; *iova = used_span.start_hole; return 0; } } return -ENOSPC; } static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length) { unsigned long last; lockdep_assert_held(&iopt->iova_rwsem); if ((iova & (iopt->iova_alignment - 1))) return -EINVAL; if (check_add_overflow(iova, length - 1, &last)) return -EOVERFLOW; /* No reserved IOVA intersects the range */ if (iopt_reserved_iter_first(iopt, iova, last)) return -EINVAL; /* Check that there is not already a mapping in the range */ if (iopt_area_iter_first(iopt, iova, last)) return -EEXIST; return 0; } /* * The area takes a slice of the pages from start_bytes to start_byte + length */ static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, struct iopt_pages *pages, unsigned long iova, unsigned long start_byte, unsigned long length, int iommu_prot) { lockdep_assert_held_write(&iopt->iova_rwsem); if ((iommu_prot & IOMMU_WRITE) && !pages->writable) return -EPERM; area->iommu_prot = iommu_prot; area->page_offset = start_byte % PAGE_SIZE; if (area->page_offset & (iopt->iova_alignment - 1)) return -EINVAL; area->node.start = iova; if (check_add_overflow(iova, length - 1, &area->node.last)) return -EOVERFLOW; area->pages_node.start = start_byte / PAGE_SIZE; if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) return -EOVERFLOW; area->pages_node.last = area->pages_node.last / PAGE_SIZE; if (WARN_ON(area->pages_node.last >= pages->npages)) return -EOVERFLOW; /* * The area is inserted with a NULL pages indicating it is not fully * initialized yet. */ area->iopt = iopt; interval_tree_insert(&area->node, &iopt->area_itree); return 0; } static struct iopt_area *iopt_area_alloc(void) { struct iopt_area *area; area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); if (!area) return NULL; RB_CLEAR_NODE(&area->node.rb); RB_CLEAR_NODE(&area->pages_node.rb); return area; } static int iopt_alloc_area_pages(struct io_pagetable *iopt, struct list_head *pages_list, unsigned long length, unsigned long *dst_iova, int iommu_prot, unsigned int flags) { struct iopt_pages_list *elm; unsigned long iova; int rc = 0; list_for_each_entry(elm, pages_list, next) { elm->area = iopt_area_alloc(); if (!elm->area) return -ENOMEM; } down_write(&iopt->iova_rwsem); if ((length & (iopt->iova_alignment - 1)) || !length) { rc = -EINVAL; goto out_unlock; } if (flags & IOPT_ALLOC_IOVA) { /* Use the first entry to guess the ideal IOVA alignment */ elm = list_first_entry(pages_list, struct iopt_pages_list, next); rc = iopt_alloc_iova( iopt, dst_iova, (uintptr_t)elm->pages->uptr + elm->start_byte, length); if (rc) goto out_unlock; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { rc = -EINVAL; goto out_unlock; } } else { rc = iopt_check_iova(iopt, *dst_iova, length); if (rc) goto out_unlock; } /* * Areas are created with a NULL pages so that the IOVA space is * reserved and we can unlock the iova_rwsem. */ iova = *dst_iova; list_for_each_entry(elm, pages_list, next) { rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, elm->start_byte, elm->length, iommu_prot); if (rc) goto out_unlock; iova += elm->length; } out_unlock: up_write(&iopt->iova_rwsem); return rc; } static void iopt_abort_area(struct iopt_area *area) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(area->pages); if (area->iopt) { down_write(&area->iopt->iova_rwsem); interval_tree_remove(&area->node, &area->iopt->area_itree); up_write(&area->iopt->iova_rwsem); } kfree(area); } void iopt_free_pages_list(struct list_head *pages_list) { struct iopt_pages_list *elm; while ((elm = list_first_entry_or_null(pages_list, struct iopt_pages_list, next))) { if (elm->area) iopt_abort_area(elm->area); if (elm->pages) iopt_put_pages(elm->pages); list_del(&elm->next); kfree(elm); } } static int iopt_fill_domains_pages(struct list_head *pages_list) { struct iopt_pages_list *undo_elm; struct iopt_pages_list *elm; int rc; list_for_each_entry(elm, pages_list, next) { rc = iopt_area_fill_domains(elm->area, elm->pages); if (rc) goto err_undo; } return 0; err_undo: list_for_each_entry(undo_elm, pages_list, next) { if (undo_elm == elm) break; iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); } return rc; } int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, unsigned long length, unsigned long *dst_iova, int iommu_prot, unsigned int flags) { struct iopt_pages_list *elm; int rc; rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, iommu_prot, flags); if (rc) return rc; down_read(&iopt->domains_rwsem); rc = iopt_fill_domains_pages(pages_list); if (rc) goto out_unlock_domains; down_write(&iopt->iova_rwsem); list_for_each_entry(elm, pages_list, next) { /* * area->pages must be set inside the domains_rwsem to ensure * any newly added domains will get filled. Moves the reference * in from the list. */ elm->area->pages = elm->pages; elm->pages = NULL; elm->area = NULL; } up_write(&iopt->iova_rwsem); out_unlock_domains: up_read(&iopt->domains_rwsem); return rc; } /** * iopt_map_user_pages() - Map a user VA to an iova in the io page table * @ictx: iommufd_ctx the iopt is part of * @iopt: io_pagetable to act on * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains * the chosen iova on output. Otherwise is the iova to map to on input * @uptr: User VA to map * @length: Number of bytes to map * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping * @flags: IOPT_ALLOC_IOVA or zero * * iova, uptr, and length must be aligned to iova_alignment. For domain backed * page tables this will pin the pages and load them into the domain at iova. * For non-domain page tables this will only setup a lazy reference and the * caller must use iopt_access_pages() to touch them. * * iopt_unmap_iova() must be called to undo this before the io_pagetable can be * destroyed. */ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsigned long *iova, void __user *uptr, unsigned long length, int iommu_prot, unsigned int flags) { struct iopt_pages_list elm = {}; LIST_HEAD(pages_list); int rc; elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); if (IS_ERR(elm.pages)) return PTR_ERR(elm.pages); if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; elm.start_byte = uptr - elm.pages->uptr; elm.length = length; list_add(&elm.next, &pages_list); rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); if (rc) { if (elm.area) iopt_abort_area(elm.area); if (elm.pages) iopt_put_pages(elm.pages); return rc; } return 0; } struct iova_bitmap_fn_arg { unsigned long flags; struct io_pagetable *iopt; struct iommu_domain *domain; struct iommu_dirty_bitmap *dirty; }; static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, unsigned long iova, size_t length, void *opaque) { struct iopt_area *area; struct iopt_area_contig_iter iter; struct iova_bitmap_fn_arg *arg = opaque; struct iommu_domain *domain = arg->domain; struct iommu_dirty_bitmap *dirty = arg->dirty; const struct iommu_dirty_ops *ops = domain->dirty_ops; unsigned long last_iova = iova + length - 1; unsigned long flags = arg->flags; int ret; iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); ret = ops->read_and_clear_dirty(domain, iter.cur_iova, last - iter.cur_iova + 1, flags, dirty); if (ret) return ret; } if (!iopt_area_contig_done(&iter)) return -EINVAL; return 0; } static int iommu_read_and_clear_dirty(struct iommu_domain *domain, struct io_pagetable *iopt, unsigned long flags, struct iommu_hwpt_get_dirty_bitmap *bitmap) { const struct iommu_dirty_ops *ops = domain->dirty_ops; struct iommu_iotlb_gather gather; struct iommu_dirty_bitmap dirty; struct iova_bitmap_fn_arg arg; struct iova_bitmap *iter; int ret = 0; if (!ops || !ops->read_and_clear_dirty) return -EOPNOTSUPP; iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, bitmap->page_size, u64_to_user_ptr(bitmap->data)); if (IS_ERR(iter)) return -ENOMEM; iommu_dirty_bitmap_init(&dirty, iter, &gather); arg.flags = flags; arg.iopt = iopt; arg.domain = domain; arg.dirty = &dirty; iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); if (!(flags & IOMMU_DIRTY_NO_CLEAR)) iommu_iotlb_sync(domain, &gather); iova_bitmap_free(iter); return ret; } int iommufd_check_iova_range(struct io_pagetable *iopt, struct iommu_hwpt_get_dirty_bitmap *bitmap) { size_t iommu_pgsize = iopt->iova_alignment; u64 last_iova; if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) return -EOVERFLOW; if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) return -EOVERFLOW; if ((bitmap->iova & (iommu_pgsize - 1)) || ((last_iova + 1) & (iommu_pgsize - 1))) return -EINVAL; if (!bitmap->page_size) return -EINVAL; if ((bitmap->iova & (bitmap->page_size - 1)) || ((last_iova + 1) & (bitmap->page_size - 1))) return -EINVAL; return 0; } int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, struct iommu_domain *domain, unsigned long flags, struct iommu_hwpt_get_dirty_bitmap *bitmap) { int ret; ret = iommufd_check_iova_range(iopt, bitmap); if (ret) return ret; down_read(&iopt->iova_rwsem); ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); up_read(&iopt->iova_rwsem); return ret; } static int iopt_clear_dirty_data(struct io_pagetable *iopt, struct iommu_domain *domain) { const struct iommu_dirty_ops *ops = domain->dirty_ops; struct iommu_iotlb_gather gather; struct iommu_dirty_bitmap dirty; struct iopt_area *area; int ret = 0; lockdep_assert_held_read(&iopt->iova_rwsem); iommu_dirty_bitmap_init(&dirty, NULL, &gather); for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) { if (!area->pages) continue; ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), iopt_area_length(area), 0, &dirty); if (ret) break; } iommu_iotlb_sync(domain, &gather); return ret; } int iopt_set_dirty_tracking(struct io_pagetable *iopt, struct iommu_domain *domain, bool enable) { const struct iommu_dirty_ops *ops = domain->dirty_ops; int ret = 0; if (!ops) return -EOPNOTSUPP; down_read(&iopt->iova_rwsem); /* Clear dirty bits from PTEs to ensure a clean snapshot */ if (enable) { ret = iopt_clear_dirty_data(iopt, domain); if (ret) goto out_unlock; } ret = ops->set_dirty_tracking(domain, enable); out_unlock: up_read(&iopt->iova_rwsem); return ret; } int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length, struct list_head *pages_list) { struct iopt_area_contig_iter iter; unsigned long last_iova; struct iopt_area *area; int rc; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { struct iopt_pages_list *elm; unsigned long last = min(last_iova, iopt_area_last_iova(area)); elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); if (!elm) { rc = -ENOMEM; goto err_free; } elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); elm->pages = area->pages; elm->length = (last - iter.cur_iova) + 1; kref_get(&elm->pages->kref); list_add_tail(&elm->next, pages_list); } if (!iopt_area_contig_done(&iter)) { rc = -ENOENT; goto err_free; } up_read(&iopt->iova_rwsem); return 0; err_free: up_read(&iopt->iova_rwsem); iopt_free_pages_list(pages_list); return rc; } static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, unsigned long last, unsigned long *unmapped) { struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; int rc = -ENOENT; /* * The domains_rwsem must be held in read mode any time any area->pages * is NULL. This prevents domain attach/detatch from running * concurrently with cleaning up the area. */ again: down_read(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); while ((area = iopt_area_iter_first(iopt, start, last))) { unsigned long area_last = iopt_area_last_iova(area); unsigned long area_first = iopt_area_iova(area); struct iopt_pages *pages; /* Userspace should not race map/unmap's of the same area */ if (!area->pages) { rc = -EBUSY; goto out_unlock_iova; } if (area_first < start || area_last > last) { rc = -ENOENT; goto out_unlock_iova; } if (area_first != start) tries = 0; /* * num_accesses writers must hold the iova_rwsem too, so we can * safely read it under the write side of the iovam_rwsem * without the pages->mutex. */ if (area->num_accesses) { size_t length = iopt_area_length(area); start = area_first; area->prevent_access = true; up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); iommufd_access_notify_unmap(iopt, area_first, length); /* Something is not responding to unmap requests. */ tries++; if (WARN_ON(tries > 100)) return -EDEADLOCK; goto again; } pages = area->pages; area->pages = NULL; up_write(&iopt->iova_rwsem); iopt_area_unfill_domains(area, pages); iopt_abort_area(area); iopt_put_pages(pages); unmapped_bytes += area_last - area_first + 1; down_write(&iopt->iova_rwsem); } if (unmapped_bytes) rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); if (unmapped) *unmapped = unmapped_bytes; return rc; } /** * iopt_unmap_iova() - Remove a range of iova * @iopt: io_pagetable to act on * @iova: Starting iova to unmap * @length: Number of bytes to unmap * @unmapped: Return number of bytes unmapped * * The requested range must be a superset of existing ranges. * Splitting/truncating IOVA mappings is not allowed. */ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped) { unsigned long iova_last; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &iova_last)) return -EOVERFLOW; return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); } int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { int rc; rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ if (rc == -ENOENT) return 0; return rc; } /* The caller must always free all the nodes in the allowed_iova rb_root. */ int iopt_set_allow_iova(struct io_pagetable *iopt, struct rb_root_cached *allowed_iova) { struct iopt_allowed *allowed; down_write(&iopt->iova_rwsem); swap(*allowed_iova, iopt->allowed_itree); for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { if (iopt_reserved_iter_first(iopt, allowed->node.start, allowed->node.last)) { swap(*allowed_iova, iopt->allowed_itree); up_write(&iopt->iova_rwsem); return -EADDRINUSE; } } up_write(&iopt->iova_rwsem); return 0; } int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, unsigned long last, void *owner) { struct iopt_reserved *reserved; lockdep_assert_held_write(&iopt->iova_rwsem); if (iopt_area_iter_first(iopt, start, last) || iopt_allowed_iter_first(iopt, start, last)) return -EADDRINUSE; reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); if (!reserved) return -ENOMEM; reserved->node.start = start; reserved->node.last = last; reserved->owner = owner; interval_tree_insert(&reserved->node, &iopt->reserved_itree); return 0; } static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) { struct iopt_reserved *reserved, *next; lockdep_assert_held_write(&iopt->iova_rwsem); for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; reserved = next) { next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); if (reserved->owner == owner) { interval_tree_remove(&reserved->node, &iopt->reserved_itree); kfree(reserved); } } } void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) { down_write(&iopt->iova_rwsem); __iopt_remove_reserved_iova(iopt, owner); up_write(&iopt->iova_rwsem); } void iopt_init_table(struct io_pagetable *iopt) { init_rwsem(&iopt->iova_rwsem); init_rwsem(&iopt->domains_rwsem); iopt->area_itree = RB_ROOT_CACHED; iopt->allowed_itree = RB_ROOT_CACHED; iopt->reserved_itree = RB_ROOT_CACHED; xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); /* * iopt's start as SW tables that can use the entire size_t IOVA space * due to the use of size_t in the APIs. They have no alignment * restriction. */ iopt->iova_alignment = 1; } void iopt_destroy_table(struct io_pagetable *iopt) { struct interval_tree_node *node; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) iopt_remove_reserved_iova(iopt, NULL); while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, ULONG_MAX))) { interval_tree_remove(node, &iopt->allowed_itree); kfree(container_of(node, struct iopt_allowed, node)); } WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); WARN_ON(!xa_empty(&iopt->domains)); WARN_ON(!xa_empty(&iopt->access_list)); WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); } /** * iopt_unfill_domain() - Unfill a domain with PFNs * @iopt: io_pagetable to act on * @domain: domain to unfill * * This is used when removing a domain from the iopt. Every area in the iopt * will be unmapped from the domain. The domain must already be removed from the * domains xarray. */ static void iopt_unfill_domain(struct io_pagetable *iopt, struct iommu_domain *domain) { struct iopt_area *area; lockdep_assert_held(&iopt->iova_rwsem); lockdep_assert_held_write(&iopt->domains_rwsem); /* * Some other domain is holding all the pfns still, rapidly unmap this * domain. */ if (iopt->next_domain_id != 0) { /* Pick an arbitrary remaining domain to act as storage */ struct iommu_domain *storage_domain = xa_load(&iopt->domains, 0); for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages; if (!pages) continue; mutex_lock(&pages->mutex); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(!area->storage_domain); if (area->storage_domain == domain) area->storage_domain = storage_domain; mutex_unlock(&pages->mutex); iopt_area_unmap_domain(area, domain); } return; } for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages; if (!pages) continue; mutex_lock(&pages->mutex); interval_tree_remove(&area->pages_node, &pages->domains_itree); WARN_ON(area->storage_domain != domain); area->storage_domain = NULL; iopt_area_unfill_domain(area, pages, domain); mutex_unlock(&pages->mutex); } } /** * iopt_fill_domain() - Fill a domain with PFNs * @iopt: io_pagetable to act on * @domain: domain to fill * * Fill the domain with PFNs from every area in the iopt. On failure the domain * is left unchanged. */ static int iopt_fill_domain(struct io_pagetable *iopt, struct iommu_domain *domain) { struct iopt_area *end_area; struct iopt_area *area; int rc; lockdep_assert_held(&iopt->iova_rwsem); lockdep_assert_held_write(&iopt->domains_rwsem); for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages; if (!pages) continue; mutex_lock(&pages->mutex); rc = iopt_area_fill_domain(area, domain); if (rc) { mutex_unlock(&pages->mutex); goto out_unfill; } if (!area->storage_domain) { WARN_ON(iopt->next_domain_id != 0); area->storage_domain = domain; interval_tree_insert(&area->pages_node, &pages->domains_itree); } mutex_unlock(&pages->mutex); } return 0; out_unfill: end_area = area; for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages; if (area == end_area) break; if (!pages) continue; mutex_lock(&pages->mutex); if (iopt->next_domain_id == 0) { interval_tree_remove(&area->pages_node, &pages->domains_itree); area->storage_domain = NULL; } iopt_area_unfill_domain(area, pages, domain); mutex_unlock(&pages->mutex); } return rc; } /* All existing area's conform to an increased page size */ static int iopt_check_iova_alignment(struct io_pagetable *iopt, unsigned long new_iova_alignment) { unsigned long align_mask = new_iova_alignment - 1; struct iopt_area *area; lockdep_assert_held(&iopt->iova_rwsem); lockdep_assert_held(&iopt->domains_rwsem); for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; area = iopt_area_iter_next(area, 0, ULONG_MAX)) if ((iopt_area_iova(area) & align_mask) || (iopt_area_length(area) & align_mask) || (area->page_offset & align_mask)) return -EADDRINUSE; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { struct iommufd_access *access; unsigned long index; xa_for_each(&iopt->access_list, index, access) if (WARN_ON(access->iova_alignment > new_iova_alignment)) return -EADDRINUSE; } return 0; } int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain) { const struct iommu_domain_geometry *geometry = &domain->geometry; struct iommu_domain *iter_domain; unsigned int new_iova_alignment; unsigned long index; int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); xa_for_each(&iopt->domains, index, iter_domain) { if (WARN_ON(iter_domain == domain)) { rc = -EEXIST; goto out_unlock; } } /* * The io page size drives the iova_alignment. Internally the iopt_pages * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE * objects into the iommu_domain. * * A iommu_domain must always be able to accept PAGE_SIZE to be * compatible as we can't guarantee higher contiguity. */ new_iova_alignment = max_t(unsigned long, 1UL << __ffs(domain->pgsize_bitmap), iopt->iova_alignment); if (new_iova_alignment > PAGE_SIZE) { rc = -EINVAL; goto out_unlock; } if (new_iova_alignment != iopt->iova_alignment) { rc = iopt_check_iova_alignment(iopt, new_iova_alignment); if (rc) goto out_unlock; } /* No area exists that is outside the allowed domain aperture */ if (geometry->aperture_start != 0) { rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, domain); if (rc) goto out_reserved; } if (geometry->aperture_end != ULONG_MAX) { rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, ULONG_MAX, domain); if (rc) goto out_reserved; } rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); if (rc) goto out_reserved; rc = iopt_fill_domain(iopt, domain); if (rc) goto out_release; iopt->iova_alignment = new_iova_alignment; xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); iopt->next_domain_id++; up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); return 0; out_release: xa_release(&iopt->domains, iopt->next_domain_id); out_reserved: __iopt_remove_reserved_iova(iopt, domain); out_unlock: up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); return rc; } static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) { unsigned long new_iova_alignment; struct iommufd_access *access; struct iommu_domain *domain; unsigned long index; lockdep_assert_held_write(&iopt->iova_rwsem); lockdep_assert_held(&iopt->domains_rwsem); /* See batch_iommu_map_small() */ if (iopt->disable_large_pages) new_iova_alignment = PAGE_SIZE; else new_iova_alignment = 1; xa_for_each(&iopt->domains, index, domain) new_iova_alignment = max_t(unsigned long, 1UL << __ffs(domain->pgsize_bitmap), new_iova_alignment); xa_for_each(&iopt->access_list, index, access) new_iova_alignment = max_t(unsigned long, access->iova_alignment, new_iova_alignment); if (new_iova_alignment > iopt->iova_alignment) { int rc; rc = iopt_check_iova_alignment(iopt, new_iova_alignment); if (rc) return rc; } iopt->iova_alignment = new_iova_alignment; return 0; } void iopt_table_remove_domain(struct io_pagetable *iopt, struct iommu_domain *domain) { struct iommu_domain *iter_domain = NULL; unsigned long index; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); xa_for_each(&iopt->domains, index, iter_domain) if (iter_domain == domain) break; if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) goto out_unlock; /* * Compress the xarray to keep it linear by swapping the entry to erase * with the tail entry and shrinking the tail. */ iopt->next_domain_id--; iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); if (index != iopt->next_domain_id) xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); iopt_unfill_domain(iopt, domain); __iopt_remove_reserved_iova(iopt, domain); WARN_ON(iopt_calculate_iova_alignment(iopt)); out_unlock: up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); } /** * iopt_area_split - Split an area into two parts at iova * @area: The area to split * @iova: Becomes the last of a new area * * This splits an area into two. It is part of the VFIO compatibility to allow * poking a hole in the mapping. The two areas continue to point at the same * iopt_pages, just with different starting bytes. */ static int iopt_area_split(struct iopt_area *area, unsigned long iova) { unsigned long alignment = area->iopt->iova_alignment; unsigned long last_iova = iopt_area_last_iova(area); unsigned long start_iova = iopt_area_iova(area); unsigned long new_start = iova + 1; struct io_pagetable *iopt = area->iopt; struct iopt_pages *pages = area->pages; struct iopt_area *lhs; struct iopt_area *rhs; int rc; lockdep_assert_held_write(&iopt->iova_rwsem); if (iova == start_iova || iova == last_iova) return 0; if (!pages || area->prevent_access) return -EBUSY; if (new_start & (alignment - 1) || iopt_area_start_byte(area, new_start) & (alignment - 1)) return -EINVAL; lhs = iopt_area_alloc(); if (!lhs) return -ENOMEM; rhs = iopt_area_alloc(); if (!rhs) { rc = -ENOMEM; goto err_free_lhs; } mutex_lock(&pages->mutex); /* * Splitting is not permitted if an access exists, we don't track enough * information to split existing accesses. */ if (area->num_accesses) { rc = -EINVAL; goto err_unlock; } /* * Splitting is not permitted if a domain could have been mapped with * huge pages. */ if (area->storage_domain && !iopt->disable_large_pages) { rc = -EINVAL; goto err_unlock; } interval_tree_remove(&area->node, &iopt->area_itree); rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, iopt_area_start_byte(area, start_iova), (new_start - 1) - start_iova + 1, area->iommu_prot); if (WARN_ON(rc)) goto err_insert; rc = iopt_insert_area(iopt, rhs, area->pages, new_start, iopt_area_start_byte(area, new_start), last_iova - new_start + 1, area->iommu_prot); if (WARN_ON(rc)) goto err_remove_lhs; /* * If the original area has filled a domain, domains_itree has to be * updated. */ if (area->storage_domain) { interval_tree_remove(&area->pages_node, &pages->domains_itree); interval_tree_insert(&lhs->pages_node, &pages->domains_itree); interval_tree_insert(&rhs->pages_node, &pages->domains_itree); } lhs->storage_domain = area->storage_domain; lhs->pages = area->pages; rhs->storage_domain = area->storage_domain; rhs->pages = area->pages; kref_get(&rhs->pages->kref); kfree(area); mutex_unlock(&pages->mutex); /* * No change to domains or accesses because the pages hasn't been * changed */ return 0; err_remove_lhs: interval_tree_remove(&lhs->node, &iopt->area_itree); err_insert: interval_tree_insert(&area->node, &iopt->area_itree); err_unlock: mutex_unlock(&pages->mutex); kfree(rhs); err_free_lhs: kfree(lhs); return rc; } int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, size_t num_iovas) { int rc = 0; int i; down_write(&iopt->iova_rwsem); for (i = 0; i < num_iovas; i++) { struct iopt_area *area; area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); if (!area) continue; rc = iopt_area_split(area, iovas[i]); if (rc) break; } up_write(&iopt->iova_rwsem); return rc; } void iopt_enable_large_pages(struct io_pagetable *iopt) { int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); WRITE_ONCE(iopt->disable_large_pages, false); rc = iopt_calculate_iova_alignment(iopt); WARN_ON(rc); up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); } int iopt_disable_large_pages(struct io_pagetable *iopt) { int rc = 0; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); if (iopt->disable_large_pages) goto out_unlock; /* Won't do it if domains already have pages mapped in them */ if (!xa_empty(&iopt->domains) && !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { rc = -EINVAL; goto out_unlock; } WRITE_ONCE(iopt->disable_large_pages, true); rc = iopt_calculate_iova_alignment(iopt); if (rc) WRITE_ONCE(iopt->disable_large_pages, false); out_unlock: up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); return rc; } int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) { int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, xa_limit_16b, GFP_KERNEL_ACCOUNT); if (rc) goto out_unlock; rc = iopt_calculate_iova_alignment(iopt); if (rc) { xa_erase(&iopt->access_list, access->iopt_access_list_id); goto out_unlock; } out_unlock: up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); return rc; } void iopt_remove_access(struct io_pagetable *iopt, struct iommufd_access *access, u32 iopt_access_list_id) { down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); WARN_ON(iopt_calculate_iova_alignment(iopt)); up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); } /* Narrow the valid_iova_itree to include reserved ranges from a device. */ int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, struct device *dev, phys_addr_t *sw_msi_start) { struct iommu_resv_region *resv; LIST_HEAD(resv_regions); unsigned int num_hw_msi = 0; unsigned int num_sw_msi = 0; int rc; if (iommufd_should_fail()) return -EINVAL; down_write(&iopt->iova_rwsem); /* FIXME: drivers allocate memory but there is no failure propogated */ iommu_get_resv_regions(dev, &resv_regions); list_for_each_entry(resv, &resv_regions, list) { if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) continue; if (sw_msi_start && resv->type == IOMMU_RESV_MSI) num_hw_msi++; if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { *sw_msi_start = resv->start; num_sw_msi++; } rc = iopt_reserve_iova(iopt, resv->start, resv->length - 1 + resv->start, dev); if (rc) goto out_reserved; } /* Drivers must offer sane combinations of regions */ if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { rc = -EINVAL; goto out_reserved; } rc = 0; goto out_free_resv; out_reserved: __iopt_remove_reserved_iova(iopt, dev); out_free_resv: iommu_put_resv_regions(dev, &resv_regions); up_write(&iopt->iova_rwsem); return rc; } |
3 || // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_LINUX__ #define __XFS_LINUX__ #include <linux/types.h> #include <linux/uuid.h> /* * Kernel specific type declarations for XFS */ typedef __s64 xfs_off_t; /* <file offset> type */ typedef unsigned long long xfs_ino_t; /* <inode> type */ typedef __s64 xfs_daddr_t; /* <disk address> type */ typedef __u32 xfs_dev_t; typedef __u32 xfs_nlink_t; #include "xfs_types.h" #include "kmem.h" #include "mrlock.h" #include <linux/semaphore.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/crc32c.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/file.h> #include <linux/filelock.h> #include <linux/swap.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/bitops.h> #include <linux/major.h> #include <linux/pagemap.h> #include <linux/vfs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/list.h> #include <linux/proc_fs.h> #include <linux/sort.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/delay.h> #include <linux/log2.h> #include <linux/spinlock.h> #include <linux/random.h> #include <linux/ctype.h> #include <linux/writeback.h> #include <linux/capability.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/list_sort.h> #include <linux/ratelimit.h> #include <linux/rhashtable.h> #include <linux/xattr.h> #include <linux/mnt_idmapping.h> #include <linux/debugfs.h> #include <asm/page.h> #include <asm/div64.h> #include <asm/param.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #include <asm/unaligned.h> #include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" #include "xfs_iops.h" #include "xfs_aops.h" #include "xfs_super.h" #include "xfs_cksum.h" #include "xfs_buf.h" #include "xfs_message.h" #include "xfs_drain.h" #ifdef __BIG_ENDIAN #define XFS_NATIVE_HOST 1 #else #undef XFS_NATIVE_HOST #endif #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val #define xfs_error_level xfs_params.error_level.val #define xfs_syncd_centisecs xfs_params.syncd_timer.val #define xfs_stats_clear xfs_params.stats_clear.val #define xfs_inherit_sync xfs_params.inherit_sync.val #define xfs_inherit_nodump xfs_params.inherit_nodump.val #define xfs_inherit_noatime xfs_params.inherit_noatim.val #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val #define xfs_blockgc_secs xfs_params.blockgc_timer.val #define current_cpu() (raw_smp_processor_id()) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) #define NBBY 8 /* number of bits per byte */ /* * Size of block device i/o is parameterized here. * Currently the system supports page-sized i/o. */ #define BLKDEV_IOSHIFT PAGE_SHIFT #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) /* number of BB's per block device block */ #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define __return_address __builtin_return_address(0) /* * Return the address of a label. Use barrier() so that the optimizer * won't reorder code to refactor the error jumpouts into a single * return, which throws off the reported address. */ #define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) #define XFS_PROJID_DEFAULT 0 #define howmany(x, y) (((x)+((y)-1))/(y)) static inline void delay(long ticks) { schedule_timeout_uninterruptible(ticks); } /* * XFS wrapper structure for sysfs support. It depends on external data * structures and is embedded in various internal data structures to implement * the XFS sysfs object heirarchy. Define it here for broad access throughout * the codebase. */ struct xfs_kobj { struct kobject kobject; struct completion complete; }; struct xstats { struct xfsstats __percpu *xs_stats; struct xfs_kobj xs_kobj; }; extern struct xstats xfsstats; static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) { return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); } static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev) { return sysv_encode_dev(dev); } /* * Various platform dependent calls that don't fit anywhere else */ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() static inline uint64_t rounddown_64(uint64_t x, uint32_t y) { do_div(x, y); return x * y; } static inline uint64_t roundup_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); return x * y; } static inline uint64_t howmany_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); return x; } /* If @b is a power of 2, return log2(b). Else return -1. */ static inline int8_t log2_if_power2(unsigned long b) { return is_power_of_2(b) ? ilog2(b) : -1; } /* If @b is a power of 2, return a mask of the lower bits, else return zero. */ static inline unsigned long long mask64_if_power2(unsigned long b) { return is_power_of_2(b) ? b - 1 : 0; } int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, char *data, enum req_op op); #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG */ #ifdef XFS_WARN #define ASSERT(expr) \ (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG && !XFS_WARN */ #define ASSERT(expr) ((void)0) #endif /* XFS_WARN */ #endif /* DEBUG */ #define XFS_IS_CORRUPT(mp, expr) \ (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \ NULL, 0, __FILE__, __LINE__, \ __this_address), \ true : false) #define STATIC static noinline #ifdef CONFIG_XFS_RT /* * make sure we ignore the inode flag if the filesystem doesn't have a * configured realtime device. */ #define XFS_IS_REALTIME_INODE(ip) \ (((ip)->i_diflags & XFS_DIFLAG_REALTIME) && \ (ip)->i_mount->m_rtdev_targp) #define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) #else #define XFS_IS_REALTIME_INODE(ip) (0) #define XFS_IS_REALTIME_MOUNT(mp) (0) #endif /* * Starting in Linux 4.15, the %p (raw pointer value) printk modifier * prints a hashed version of the pointer to avoid leaking kernel * pointers into dmesg. If we're trying to debug the kernel we want the * raw values, so override this behavior as best we can. */ #ifdef DEBUG # define PTR_FMT "%px" #else # define PTR_FMT "%p" #endif #endif /* __XFS_LINUX__ */ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/net/dsa_stubs.h - Stubs for the Distributed Switch Architecture framework */ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/net_tstamp.h> #include <net/dsa.h> #if IS_ENABLED(CONFIG_NET_DSA) extern const struct dsa_stubs *dsa_stubs; struct dsa_stubs { int (*conduit_hwtstamp_validate)(struct net_device *dev, const struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); }; static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, const struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { if (!netdev_uses_dsa(dev)) return 0; /* rtnl_lock() is a sufficient guarantee, because as long as * netdev_uses_dsa() returns true, the dsa_core module is still * registered, and so, dsa_unregister_stubs() couldn't have run. * For netdev_uses_dsa() to start returning false, it would imply that * dsa_conduit_teardown() has executed, which requires rtnl_lock(). */ ASSERT_RTNL(); return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack); } #else static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, const struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { return 0; } #endif |
7 5 7 9 || /* * linux/fs/nls/nls_cp737.c * * Charset cp737 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, /* 0x90*/ 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, /* 0xa0*/ 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x03c3, 0x03c2, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03c9, 0x03ac, 0x03ad, 0x03ae, 0x03ca, 0x03af, 0x03cc, 0x03cd, 0x03cb, 0x03ce, 0x0386, 0x0388, 0x0389, 0x038a, 0x038c, 0x038e, /* 0xf0*/ 0x038f, 0x00b1, 0x2265, 0x2264, 0x03aa, 0x03ab, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0x00, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, 0x00, /* 0x80-0x87 */ 0xeb, 0xec, 0xed, 0x00, 0xee, 0x00, 0xef, 0xf0, /* 0x88-0x8f */ 0x00, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, /* 0x90-0x97 */ 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, /* 0x98-0x9f */ 0x8f, 0x90, 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, /* 0xa0-0xa7 */ 0x96, 0x97, 0xf4, 0xf5, 0xe1, 0xe2, 0xe3, 0xe5, /* 0xa8-0xaf */ 0x00, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, /* 0xb0-0xb7 */ 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, /* 0xb8-0xbf */ 0xa7, 0xa8, 0xaa, 0xa9, 0xab, 0xac, 0xad, 0xae, /* 0xc0-0xc7 */ 0xaf, 0xe0, 0xe4, 0xe8, 0xe6, 0xe7, 0xe9, 0x00, /* 0xc8-0xcf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, NULL, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x80-0x87 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x88-0x8f */ 0xa8, 0xa9, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xe0, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xe1, 0xe2, 0xe3, 0xe5, 0xe6, 0xe7, /* 0xe8-0xef */ 0xe9, 0xf1, 0xf2, 0xf3, 0xe4, 0xe8, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x98-0x9f */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0xa0-0xa7 */ 0x90, 0x91, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x97, 0xea, 0xeb, 0xec, 0xf4, 0xed, 0xee, 0xef, /* 0xe0-0xe7 */ 0xf5, 0xf0, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp737", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp737(void) { return register_nls(&table); } static void __exit exit_nls_cp737(void) { unregister_nls(&table); } module_init(init_nls_cp737) module_exit(exit_nls_cp737) MODULE_LICENSE("Dual BSD/GPL"); |
2 2 3 67 123 6 2 5 2 || /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H #include <linux/types.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> #include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> struct fib_kuid_range { kuid_t start; kuid_t end; }; struct fib_rule { struct list_head list; int iifindex; int oifindex; u32 mark; u32 mark_mask; u32 flags; u32 table; u8 action; u8 l3mdev; u8 proto; u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; struct rcu_head rcu; }; struct fib_lookup_arg { void *lookup_ptr; const void *lookup_data; void *result; struct fib_rule *rule; u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { int family; struct list_head list; int rule_size; int addr_size; int unresolved_rules; int nr_goto_rules; unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, struct nlattr **, struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; struct list_head rules_list; struct module *owner; struct net *fro_net; struct rcu_head rcu; }; struct fib_rule_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_rule *rule; }; static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } #ifdef CONFIG_NET_L3_MASTER_DEV static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->l3mdev ? arg->table : rule->table; } #else static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->table; } #endif static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) return nla_get_u32(nla[FRA_TABLE]); return frh->table; } static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) { return range->start != 0 && range->end != 0; } static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, __be16 port) { return ntohs(port) >= a->start && ntohs(port) <= a->end; } static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && a->start <= a->end; } static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, struct fib_rule_port_range *b) { return a->start == b->start && a->end == b->end; } static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || fib_rule_port_range_set(&rule->dport_range)); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); #endif |
2 1 1 || // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #include <rdma/rdma_netlink.h> #include <net/addrconf.h> #include "rxe.h" #include "rxe_loc.h" MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); /* free resources for a rxe device all objects created for this device must * have been destroyed */ void rxe_dealloc(struct ib_device *ib_dev) { struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); rxe_pool_cleanup(&rxe->uc_pool); rxe_pool_cleanup(&rxe->pd_pool); rxe_pool_cleanup(&rxe->ah_pool); rxe_pool_cleanup(&rxe->srq_pool); rxe_pool_cleanup(&rxe->qp_pool); rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); } /* initialize rxe device parameters */ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; rxe->attr.vendor_id = RXE_VENDOR_ID; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; rxe->attr.max_send_sge = RXE_MAX_SGE; rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_mw = RXE_MAX_MW; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, rxe->ndev->dev_addr); rxe->max_ucontext = RXE_MAX_UCONTEXT; } /* initialize port attributes */ static void rxe_init_port_param(struct rxe_port *port) { port->attr.state = IB_PORT_DOWN; port->attr.max_mtu = IB_MTU_4096; port->attr.active_mtu = IB_MTU_256; port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; port->attr.lid = RXE_PORT_LID; port->attr.sm_lid = RXE_PORT_SM_LID; port->attr.lmc = RXE_PORT_LMC; port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; port->attr.sm_sl = RXE_PORT_SM_SL; port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; port->attr.phys_state = RXE_PORT_PHYS_STATE; port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); } /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ static void rxe_init_ports(struct rxe_dev *rxe) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); addrconf_addr_eui48((unsigned char *)&port->port_guid, rxe->ndev->dev_addr); spin_lock_init(&port->port_lock); } /* init pools of managed objects */ static void rxe_init_pools(struct rxe_dev *rxe) { rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ static void rxe_init(struct rxe_dev *rxe) { /* init default device parameters */ rxe_init_device_param(rxe); rxe_init_ports(rxe); rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); /* init multicast support */ spin_lock_init(&rxe->mcg_lock); rxe->mcg_tree = RB_ROOT; mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) { struct rxe_port *port = &rxe->port; enum ib_mtu mtu; mtu = eth_mtu_int_to_enum(ndev_mtu); /* Make sure that new MTU in range */ mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256; port->attr.active_mtu = mtu; port->mtu_cap = ib_mtu_enum_to_int(mtu); rxe_info_dev(rxe, "Set mtu to %d", port->mtu_cap); } /* called by ifc layer to create new rxe device. * The caller should allocate memory for rxe by calling ib_alloc_device. */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); } static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) { struct rxe_dev *rxe; int err = 0; if (is_vlan_dev(ndev)) { rxe_err("rxe creation allowed on top of a real device only"); err = -EPERM; goto err; } rxe = rxe_get_dev_from_net(ndev); if (rxe) { ib_device_put(&rxe->ib_dev); rxe_err_dev(rxe, "already configured on %s", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { rxe_err("failed to add %s\n", ndev->name); goto err; } err: return err; } static struct rdma_link_ops rxe_link_ops = { .type = "rxe", .newlink = rxe_newlink, }; static int __init rxe_module_init(void) { int err; err = rxe_alloc_wq(); if (err) return err; err = rxe_net_init(); if (err) { rxe_destroy_wq(); return err; } rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; } static void __exit rxe_module_exit(void) { rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); rxe_destroy_wq(); pr_info("unloaded\n"); } late_initcall(rxe_module_init); module_exit(rxe_module_exit); MODULE_ALIAS_RDMA_LINK("rxe"); |
4 1184 44 1182 3 1181 3 3 2 2 || // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/types.h> #include <linux/ipv6.h> #include <linux/in6.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmp.h> #include <linux/rcupdate.h> #include <linux/sysctl.h> #include <net/ipv6_frag.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #endif #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); } #endif if (nf_bridge_in_prerouting(skb)) return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) return IP6_DEFRAG_CONNTRACK_IN + zone_id; else return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; if (skb->_nfct == IP_CT_UNTRACKED) return NF_ACCEPT; #endif err = nf_ct_frag6_gather(state->net, skb, nf_ct6_defrag_user(state->hook, skb)); /* queued */ if (err == -EINPROGRESS) return NF_STOLEN; return err == 0 ? NF_ACCEPT : NF_DROP; } static const struct nf_hook_ops ipv6_defrag_ops[] = { { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, }; static void __net_exit defrag6_net_exit(struct net *net) { if (net->nf.defrag_ipv6_users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); net->nf.defrag_ipv6_users = 0; } } static const struct nf_defrag_hook defrag_hook = { .owner = THIS_MODULE, .enable = nf_defrag_ipv6_enable, .disable = nf_defrag_ipv6_disable, }; static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, }; static int __init nf_defrag_init(void) { int ret = 0; ret = nf_ct_frag6_init(); if (ret < 0) { pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); return ret; } ret = register_pernet_subsys(&defrag6_net_ops); if (ret < 0) { pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook); return ret; cleanup_frag6: nf_ct_frag6_cleanup(); return ret; } static void __exit nf_defrag_fini(void) { rcu_assign_pointer(nf_defrag_v6_hook, NULL); unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } int nf_defrag_ipv6_enable(struct net *net) { int err = 0; mutex_lock(&defrag6_mutex); if (net->nf.defrag_ipv6_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } if (net->nf.defrag_ipv6_users) { net->nf.defrag_ipv6_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) net->nf.defrag_ipv6_users = 1; out_unlock: mutex_unlock(&defrag6_mutex); return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); void nf_defrag_ipv6_disable(struct net *net) { mutex_lock(&defrag6_mutex); if (net->nf.defrag_ipv6_users) { net->nf.defrag_ipv6_users--; if (net->nf.defrag_ipv6_users == 0) nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); } mutex_unlock(&defrag6_mutex); } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_disable); module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 defragmentation support"); |
1 1 1 1 72 72 1 1 || // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/profile.c * Simple profiling. Manages a direct-mapped profile hit count buffer, * with configurable resolution, support for restricting the cpus on * which profiling is done, and switching between cpu time and * schedule() calls via kernel command line parameters passed at boot. * * Scheduler profiling support, Arjan van de Ven and Ingo Molnar, * Red Hat, July 2004 * Consolidation of architecture support code for profiling, * Nadia Yvette Chambers, Oracle, July 2004 * Amortized hit count accounting via per-cpu open-addressed hashtables * to resolve timer interrupt livelocks, Nadia Yvette Chambers, * Oracle, 2004 */ #include <linux/export.h> #include <linux/profile.h> #include <linux/memblock.h> #include <linux/notifier.h> #include <linux/mm.h> #include <linux/cpumask.h> #include <linux/cpu.h> #include <linux/highmem.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/sched/stat.h> #include <asm/sections.h> #include <asm/irq_regs.h> #include <asm/ptrace.h> struct profile_hit { u32 pc, hits; }; #define PROFILE_GRPSHIFT 3 #define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT) #define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) static atomic_t *prof_buffer; static unsigned long prof_len; static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); static cpumask_var_t prof_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); static DEFINE_PER_CPU(int, cpu_profile_flip); static DEFINE_MUTEX(profile_flip_mutex); #endif /* CONFIG_SMP */ int profile_setup(char *str) { static const char schedstr[] = "schedule"; static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; const char *select = NULL; int par; if (!strncmp(str, sleepstr, strlen(sleepstr))) { #ifdef CONFIG_SCHEDSTATS force_schedstat_enabled(); prof_on = SLEEP_PROFILING; select = sleepstr; #else pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); #endif /* CONFIG_SCHEDSTATS */ } else if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { prof_on = KVM_PROFILING; select = kvmstr; } else if (get_option(&str, &par)) { prof_shift = clamp(par, 0, BITS_PER_LONG - 1); prof_on = CPU_PROFILING; pr_info("kernel profiling enabled (shift: %u)\n", prof_shift); } if (select) { if (str[strlen(select)] == ',') str += strlen(select) + 1; if (get_option(&str, &par)) prof_shift = clamp(par, 0, BITS_PER_LONG - 1); pr_info("kernel %s profiling enabled (shift: %u)\n", select, prof_shift); } return 1; } __setup("profile=", profile_setup); int __ref profile_init(void) { int buffer_bytes; if (!prof_on) return 0; /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; if (!prof_len) { pr_warn("profiling shift: %u too large\n", prof_shift); prof_on = 0; return -EINVAL; } buffer_bytes = prof_len*sizeof(atomic_t); if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM; cpumask_copy(prof_cpu_mask, cpu_possible_mask); prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN); if (prof_buffer) return 0; prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN); if (prof_buffer) return 0; prof_buffer = vzalloc(buffer_bytes); if (prof_buffer) return 0; free_cpumask_var(prof_cpu_mask); return -ENOMEM; } #if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) /* * Each cpu has a pair of open-addressed hashtables for pending * profile hits. read_profile() IPI's all cpus to request them * to flip buffers and flushes their contents to prof_buffer itself. * Flip requests are serialized by the profile_flip_mutex. The sole * use of having a second hashtable is for avoiding cacheline * contention that would otherwise happen during flushes of pending * profile hits required for the accuracy of reported profile hits * and so resurrect the interrupt livelock issue. * * The open-addressed hashtables are indexed by profile buffer slot * and hold the number of pending hits to that profile buffer slot on * a cpu in an entry. When the hashtable overflows, all pending hits * are accounted to their corresponding profile buffer slots with * atomic_add() and the hashtable emptied. As numerous pending hits * may be accounted to a profile buffer slot in a hashtable entry, * this amortizes a number of atomic profile buffer increments likely * to be far larger than the number of entries in the hashtable, * particularly given that the number of distinct profile buffer * positions to which hits are accounted during short intervals (e.g. * several seconds) is usually very small. Exclusion from buffer * flipping is provided by interrupt disablement (note that for * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from * process context). * The hash function is meant to be lightweight as opposed to strong, * and was vaguely inspired by ppc64 firmware-supported inverted * pagetable hash functions, but uses a full hashtable full of finite * collision chains, not just pairs of them. * * -- nyc */ static void __profile_flip_buffers(void *unused) { int cpu = smp_processor_id(); per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); } static void profile_flip_buffers(void) { int i, j, cpu; mutex_lock(&profile_flip_mutex); j = per_cpu(cpu_profile_flip, get_cpu()); put_cpu(); on_each_cpu(__profile_flip_buffers, NULL, 1); for_each_online_cpu(cpu) { struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; for (i = 0; i < NR_PROFILE_HIT; ++i) { if (!hits[i].hits) { if (hits[i].pc) hits[i].pc = 0; continue; } atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); hits[i].hits = hits[i].pc = 0; } } mutex_unlock(&profile_flip_mutex); } static void profile_discard_flip_buffers(void) { int i, cpu; mutex_lock(&profile_flip_mutex); i = per_cpu(cpu_profile_flip, get_cpu()); put_cpu(); on_each_cpu(__profile_flip_buffers, NULL, 1); for_each_online_cpu(cpu) { struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); } mutex_unlock(&profile_flip_mutex); } static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long primary, secondary, flags, pc = (unsigned long)__pc; int i, j, cpu; struct profile_hit *hits; pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; cpu = get_cpu(); hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; if (!hits) { put_cpu(); return; } /* * We buffer the global profiler buffer into a per-CPU * queue and thus reduce the number of global (and possibly * NUMA-alien) accesses. The write-queue is self-coalescing: */ local_irq_save(flags); do { for (j = 0; j < PROFILE_GRPSZ; ++j) { if (hits[i + j].pc == pc) { hits[i + j].hits += nr_hits; goto out; } else if (!hits[i + j].hits) { hits[i + j].pc = pc; hits[i + j].hits = nr_hits; goto out; } } i = (i + secondary) & (NR_PROFILE_HIT - 1); } while (i != primary); /* * Add the current hit(s) and flush the write-queue out * to the global buffer: */ atomic_add(nr_hits, &prof_buffer[pc]); for (i = 0; i < NR_PROFILE_HIT; ++i) { atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); hits[i].pc = hits[i].hits = 0; } out: local_irq_restore(flags); put_cpu(); } static int profile_dead_cpu(unsigned int cpu) { struct page *page; int i; if (cpumask_available(prof_cpu_mask)) cpumask_clear_cpu(cpu, prof_cpu_mask); for (i = 0; i < 2; i++) { if (per_cpu(cpu_profile_hits, cpu)[i]) { page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); per_cpu(cpu_profile_hits, cpu)[i] = NULL; __free_page(page); } } return 0; } static int profile_prepare_cpu(unsigned int cpu) { int i, node = cpu_to_mem(cpu); struct page *page; per_cpu(cpu_profile_flip, cpu) = 0; for (i = 0; i < 2; i++) { if (per_cpu(cpu_profile_hits, cpu)[i]) continue; page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); if (!page) { profile_dead_cpu(cpu); return -ENOMEM; } per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); } return 0; } static int profile_online_cpu(unsigned int cpu) { if (cpumask_available(prof_cpu_mask)) cpumask_set_cpu(cpu, prof_cpu_mask); return 0; } #else /* !CONFIG_SMP */ #define profile_flip_buffers() do { } while (0) #define profile_discard_flip_buffers() do { } while (0) static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); } #endif /* !CONFIG_SMP */ void profile_hits(int type, void *__pc, unsigned int nr_hits) { if (prof_on != type || !prof_buffer) return; do_profile_hits(type, __pc, nr_hits); } EXPORT_SYMBOL_GPL(profile_hits); void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) { seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); return 0; } static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) { return single_open(file, prof_cpu_mask_proc_show, NULL); } static ssize_t prof_cpu_mask_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { cpumask_var_t new_value; int err; if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); if (!err) { cpumask_copy(prof_cpu_mask, new_value); err = count; } free_cpumask_var(new_value); return err; } static const struct proc_ops prof_cpu_mask_proc_ops = { .proc_open = prof_cpu_mask_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_write = prof_cpu_mask_proc_write, }; void create_prof_cpu_mask(void) { /* create /proc/irq/prof_cpu_mask */ proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); } /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile * buffer. Use of the program readprofile is recommended in order to * get meaningful info out of these data. */ static ssize_t read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; ssize_t read; char *pnt; unsigned long sample_step = 1UL << prof_shift; profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) count = (prof_len+1)*sizeof(unsigned int) - p; read = 0; while (p < sizeof(unsigned int) && count > 0) { if (put_user(*((char *)(&sample_step)+p), buf)) return -EFAULT; buf++; p++; count--; read++; } pnt = (char *)prof_buffer + p - sizeof(atomic_t); if (copy_to_user(buf, (void *)pnt, count)) return -EFAULT; read += count; *ppos += read; return read; } /* default is to not implement this call */ int __weak setup_profiling_timer(unsigned mult) { return -EINVAL; } /* * Writing to /proc/profile resets the counters * * Writing a 'profiling multiplier' value into it also re-sets the profiling * interrupt frequency, on architectures that support this. */ static ssize_t write_profile(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { #ifdef CONFIG_SMP if (count == sizeof(int)) { unsigned int multiplier; if (copy_from_user(&multiplier, buf, sizeof(int))) return -EFAULT; if (setup_profiling_timer(multiplier)) return -EINVAL; } #endif profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } static const struct proc_ops profile_proc_ops = { .proc_read = read_profile, .proc_write = write_profile, .proc_lseek = default_llseek, }; int __ref create_proc_profile(void) { struct proc_dir_entry *entry; #ifdef CONFIG_SMP enum cpuhp_state online_state; #endif int err = 0; if (!prof_on) return 0; #ifdef CONFIG_SMP err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", profile_prepare_cpu, profile_dead_cpu); if (err) return err; err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", profile_online_cpu, NULL); if (err < 0) goto err_state_prep; online_state = err; err = 0; #endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &profile_proc_ops); if (!entry) goto err_state_onl; proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); return err; err_state_onl: #ifdef CONFIG_SMP cpuhp_remove_state(online_state); err_state_prep: cpuhp_remove_state(CPUHP_PROFILE_PREPARE); #endif return err; } subsys_initcall(create_proc_profile); #endif /* CONFIG_PROC_FS */ |
3 || // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/buffer_head.h> #include <linux/delay.h> #include <linux/sort.h> #include <linux/hash.h> #include <linux/jhash.h> #include <linux/kallsyms.h> #include <linux/gfs2_ondisk.h> #include <linux/list.h> #include <linux/wait.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/rcupdate.h> #include <linux/rculist_bl.h> #include <linux/bit_spinlock.h> #include <linux/percpu.h> #include <linux/list_sort.h> #include <linux/lockref.h> #include <linux/rhashtable.h> #include <linux/pid_namespace.h> #include <linux/fdtable.h> #include <linux/file.h> #include "gfs2.h" #include "incore.h" #include "glock.h" #include "glops.h" #include "inode.h" #include "lops.h" #include "meta_io.h" #include "quota.h" #include "super.h" #include "util.h" #include "bmap.h" #define CREATE_TRACE_POINTS #include "trace_gfs2.h" struct gfs2_glock_iter { struct gfs2_sbd *sdp; /* incore superblock */ struct rhashtable_iter hti; /* rhashtable iterator */ struct gfs2_glock *gl; /* current glock struct */ loff_t last_pos; /* last position */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static void __gfs2_glock_dq(struct gfs2_holder *gh); static void handle_callback(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) static const struct rhashtable_params ht_parms = { .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, .key_len = offsetofend(struct lm_lockname, ln_type), .key_offset = offsetof(struct gfs2_glock, gl_name), .head_offset = offsetof(struct gfs2_glock, gl_node), }; static struct rhashtable gl_hash_table; #define GLOCK_WAIT_TABLE_BITS 12 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; struct wait_glock_queue { struct lm_lockname *name; wait_queue_entry_t wait; }; static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct wait_glock_queue *wait_glock = container_of(wait, struct wait_glock_queue, wait); struct lm_lockname *wait_name = wait_glock->name; struct lm_lockname *wake_name = key; if (wake_name->ln_sbd != wait_name->ln_sbd || wake_name->ln_number != wait_name->ln_number || wake_name->ln_type != wait_name->ln_type) return 0; return autoremove_wake_function(wait, mode, sync, key); } static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) { u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); } /** * wake_up_glock - Wake up waiters on a glock * @gl: the glock */ static void wake_up_glock(struct gfs2_glock *gl) { wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); if (waitqueue_active(wq)) __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); } static void gfs2_glock_dealloc(struct rcu_head *rcu) { struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); kfree(gl->gl_lksb.sb_lvbptr); if (gl->gl_ops->go_flags & GLOF_ASPACE) { struct gfs2_glock_aspace *gla = container_of(gl, struct gfs2_glock_aspace, glock); kmem_cache_free(gfs2_glock_aspace_cachep, gla); } else kmem_cache_free(gfs2_glock_cachep, gl); } /** * glock_blocked_by_withdraw - determine if we can still use a glock * @gl: the glock * * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted * when we're withdrawn. For example, to maintain metadata integrity, we should * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like * the iopen or freeze glock may be safely used because none of their * metadata goes through the journal. So in general, we should disallow all * glocks that are journaled, and allow all the others. One exception is: * we need to allow our active journal to be promoted and demoted so others * may recover it and we can reacquire it when they're done. */ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (likely(!gfs2_withdrawn(sdp))) return false; if (gl->gl_ops->go_flags & GLOF_NONDISK) return false; if (!sdp->sd_jdesc || gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) return false; return true; } void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_kill_wait); } /** * gfs2_glock_hold() - increment reference count on glock * @gl: The glock to hold * */ struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); lockref_get(&gl->gl_lockref); return gl; } /** * demote_ok - Check to see if it's ok to unlock a glock * @gl: the glock * * Returns: 1 if it's ok */ static int demote_ok(const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; if (gl->gl_state == LM_ST_UNLOCKED) return 0; if (!list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) return glops->go_demote_ok(gl); return 1; } void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { if (!(gl->gl_ops->go_flags & GLOF_LRU)) return; spin_lock(&lru_lock); list_move_tail(&gl->gl_lru, &lru_list); if (!test_bit(GLF_LRU, &gl->gl_flags)) { set_bit(GLF_LRU, &gl->gl_flags); atomic_inc(&lru_count); } spin_unlock(&lru_lock); } static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { if (!(gl->gl_ops->go_flags & GLOF_LRU)) return; spin_lock(&lru_lock); if (test_bit(GLF_LRU, &gl->gl_flags)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); clear_bit(GLF_LRU, &gl->gl_flags); } spin_unlock(&lru_lock); } /* * Enqueue the glock on the work queue. Passes one glock reference on to the * work queue. */ static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { /* * We are holding the lockref spinlock, and the work was still * queued above. The queued work (glock_work_func) takes that * spinlock before dropping its glock reference(s), so it * cannot have dropped them in the meantime. */ GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); gl->gl_lockref.count--; } } static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { spin_lock(&gl->gl_lockref.lock); __gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); } static void __gfs2_glock_put(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); lockref_mark_dead(&gl->gl_lockref); spin_unlock(&gl->gl_lockref.lock); gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); if (mapping) { truncate_inode_pages_final(mapping); if (!gfs2_withdrawn(sdp)) GLOCK_BUG_ON(gl, !mapping_empty(mapping)); } trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } /* * Cause the glock to be put in work queue context. */ void gfs2_glock_queue_put(struct gfs2_glock *gl) { gfs2_glock_queue_work(gl, 0); } /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put * */ void gfs2_glock_put(struct gfs2_glock *gl) { if (lockref_put_or_lock(&gl->gl_lockref)) return; __gfs2_glock_put(gl); } /** * may_grant - check if it's ok to grant a new lock * @gl: The glock * @current_gh: One of the current holders of @gl * @gh: The lock request which we wish to grant * * With our current compatibility rules, if a glock has one or more active * holders (HIF_HOLDER flag set), any of those holders can be passed in as * @current_gh; they are all the same as far as compatibility with the new @gh * goes. * * Returns true if it's ok to grant the lock. */ static inline bool may_grant(struct gfs2_glock *gl, struct gfs2_holder *current_gh, struct gfs2_holder *gh) { if (current_gh) { GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); switch(current_gh->gh_state) { case LM_ST_EXCLUSIVE: /* * Here we make a special exception to grant holders * who agree to share the EX lock with other holders * who also have the bit set. If the original holder * has the LM_FLAG_NODE_SCOPE bit set, we grant more * holders with the bit set. */ return gh->gh_state == LM_ST_EXCLUSIVE && (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && (gh->gh_flags & LM_FLAG_NODE_SCOPE); case LM_ST_SHARED: case LM_ST_DEFERRED: return gh->gh_state == current_gh->gh_state; default: return false; } } if (gl->gl_state == gh->gh_state) return true; if (gh->gh_flags & GL_EXACT) return false; if (gl->gl_state == LM_ST_EXCLUSIVE) { return gh->gh_state == LM_ST_SHARED || gh->gh_state == LM_ST_DEFERRED; } if (gh->gh_flags & LM_FLAG_ANY) return gl->gl_state != LM_ST_UNLOCKED; return false; } static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); smp_mb__after_atomic(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); if (gh->gh_flags & GL_ASYNC) { struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; wake_up(&sdp->sd_async_glock_wait); } } /** * do_error - Something unexpected has happened during a lock request * @gl: The glock * @ret: The status from the DLM */ static void do_error(struct gfs2_glock *gl, const int ret) { struct gfs2_holder *gh, *tmp; list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (ret & LM_OUT_ERROR) gh->gh_error = -EIO; else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) gh->gh_error = GLR_TRYFAILED; else continue; list_del_init(&gh->gh_list); trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); } } /** * find_first_holder - find the first "holder" gh * @gl: the glock */ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) { struct gfs2_holder *gh; if (!list_empty(&gl->gl_holders)) { gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); if (test_bit(HIF_HOLDER, &gh->gh_iflags)) return gh; } return NULL; } /* * gfs2_instantiate - Call the glops instantiate function * @gh: The glock holder * * Returns: 0 if instantiate was successful, or error. */ int gfs2_instantiate(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; int ret; again: if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) goto done; /* * Since we unlock the lockref lock, we set a flag to indicate * instantiate is in progress. */ if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, TASK_UNINTERRUPTIBLE); /* * Here we just waited for a different instantiate to finish. * But that may not have been successful, as when a process * locks an inode glock _before_ it has an actual inode to * instantiate into. So we check again. This process might * have an inode to instantiate, so might be successful. */ goto again; } ret = glops->go_instantiate(gl); if (!ret) clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); if (ret) return ret; done: if (glops->go_held) return glops->go_held(gh); return 0; } /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock * * Returns true on success (i.e., progress was made or there are no waiters). */ static bool do_promote(struct gfs2_glock *gl) { struct gfs2_holder *gh, *current_gh; current_gh = find_first_holder(gl); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (!may_grant(gl, current_gh, gh)) { /* * If we get here, it means we may not grant this * holder for some reason. If this holder is at the * head of the list, it means we have a blocked holder * at the head, so return false. */ if (list_is_first(&gh->gh_list, &gl->gl_holders)) return false; do_error(gl, 0); break; } set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); if (!current_gh) current_gh = gh; } return true; } /** * find_first_waiter - find the first gh that's waiting for the glock * @gl: the glock */ static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) { struct gfs2_holder *gh; list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) return gh; } return NULL; } /** * state_change - record that the glock is now in a different state * @gl: the glock * @new_state: the new state */ static void state_change(struct gfs2_glock *gl, unsigned int new_state) { int held1, held2; held1 = (gl->gl_state != LM_ST_UNLOCKED); held2 = (new_state != LM_ST_UNLOCKED); if (held1 != held2) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); if (held2) gl->gl_lockref.count++; else gl->gl_lockref.count--; } if (new_state != gl->gl_target) /* shorten our minimum hold time */ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } static void gfs2_set_demote(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; set_bit(GLF_DEMOTE, &gl->gl_flags); smp_mb(); wake_up(&sdp->sd_async_glock_wait); } static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); smp_mb__after_atomic(); wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } /** * finish_xmote - The DLM has replied to one of our lock requests * @gl: The glock * @ret: The status from the DLM * */ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh; unsigned state = ret & LM_OUT_ST_MASK; spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); /* Demote to UN request arrived during demote to SH or DF */ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) gl->gl_target = LM_ST_UNLOCKED; /* Check for state != intended state */ if (unlikely(state != gl->gl_target)) { if (gh && (ret & LM_OUT_CANCELED)) gfs2_holder_wake(gh); if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { /* move to back of queue and try next entry */ if (ret & LM_OUT_CANCELED) { list_move_tail(&gh->gh_list, &gl->gl_holders); gh = find_first_waiter(gl); gl->gl_target = gh->gh_state; if (do_promote(gl)) goto out; goto retry; } /* Some error or failed "try lock" - report it */ if ((ret & LM_OUT_ERROR) || (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { gl->gl_target = gl->gl_state; do_error(gl, ret); goto out; } } switch(state) { /* Unlocked due to conversion deadlock, try again */ case LM_ST_UNLOCKED: retry: do_xmote(gl, gh, gl->gl_target); break; /* Conversion fails, unlock and try again */ case LM_ST_SHARED: case LM_ST_DEFERRED: do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_lockref.lock); return; } /* Fast path - we got what we asked for */ if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { int rv; spin_unlock(&gl->gl_lockref.lock); rv = glops->go_xmote_bh(gl); spin_lock(&gl->gl_lockref.lock); if (rv) { do_error(gl, rv); goto out; } } do_promote(gl); } out: clear_bit(GLF_LOCK, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); } static bool is_system_glock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); if (gl == m_ip->i_gl) return true; return false; } /** * do_xmote - Calls the DLM to change the state of a lock * @gl: The lock state * @gh: The holder (only for promotes) * @target: The target lock state * */ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && gh && !(gh->gh_flags & LM_FLAG_NOEXP)) goto skip_inval; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP); GLOCK_BUG_ON(gl, gl->gl_state == target); GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && glops->go_inval) { /* * If another process is already doing the invalidate, let that * finish first. The glock state machine will get back to this * holder again later. */ if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) return; do_error(gl, 0); /* Fail queued try locks */ } gl->gl_req = target; set_bit(GLF_BLOCKING, &gl->gl_flags); if ((gl->gl_req == LM_ST_UNLOCKED) || (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) { ret = glops->go_sync(gl); /* If we had a problem syncing (due to io errors or whatever, * we should not invalidate the metadata or tell dlm to * release the glock to other nodes. */ if (ret) { if (cmpxchg(&sdp->sd_log_error, 0, ret)) { fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } goto skip_inval; } } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { /* * The call to go_sync should have cleared out the ail list. * If there are still items, we have a problem. We ought to * withdraw, but we can't because the withdraw code also uses * glocks. Warn about the error, dump the glock, then fall * through and wait for logd to do the withdraw for us. */ if ((atomic_read(&gl->gl_ail_count) != 0) && (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { gfs2_glock_assert_warn(gl, !atomic_read(&gl->gl_ail_count)); gfs2_dump_glock(NULL, gl, true); } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } skip_inval: gfs2_glock_hold(gl); /* * Check for an error encountered since we called go_sync and go_inval. * If so, we can't withdraw from the glock code because the withdraw * code itself uses glocks (see function signal_our_withdraw) to * change the mount to read-only. Most importantly, we must not call * dlm to unlock the glock until the journal is in a known good state * (after journal replay) otherwise other nodes may use the object * (rgrp or dinode) and then later, journal replay will corrupt the * file system. The best we can do here is wait for the logd daemon * to see sd_log_error and withdraw, and in the meantime, requeue the * work for later. * * We make a special exception for some system glocks, such as the * system statfs inode glock, which needs to be granted before the * gfs2_quotad daemon can exit, and that exit needs to finish before * we can unmount the withdrawn file system. * * However, if we're just unlocking the lock (say, for unmount, when * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) gfs2_withdraw_delayed(sdp); if (glock_blocked_by_withdraw(gl) && (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { if (!is_system_glock(gl)) { handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */ /* * Ordinarily, we would call dlm and its callback would call * finish_xmote, which would call state_change() to the new state. * Since we withdrew, we won't call dlm, so call state_change * manually, but to the UNLOCKED state we desire. */ state_change(gl, LM_ST_UNLOCKED); /* * We skip telling dlm to do the locking, so we won't get a * reply that would otherwise clear GLF_LOCK. So we clear it here. */ clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); goto out; } else { clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } } if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); } } else { /* lock_nolock */ finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } out: spin_lock(&gl->gl_lockref.lock); } /** * run_queue - do all outstanding tasks related to a glock * @gl: The glock in question * @nonblock: True if we must not block in run_queue * */ static void run_queue(struct gfs2_glock *gl, const int nonblock) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) return; GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); if (test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_demote_state != gl->gl_state) { if (find_first_holder(gl)) goto out_unlock; if (nonblock) goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; } else { if (test_bit(GLF_DEMOTE, &gl->gl_flags)) gfs2_demote_wake(gl); if (do_promote(gl)) goto out_unlock; gh = find_first_waiter(gl); gl->gl_target = gh->gh_state; if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) do_error(gl, 0); /* Fail queued try locks */ } do_xmote(gl, gh, gl->gl_target); return; out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_atomic(); gl->gl_lockref.count++; __gfs2_glock_queue_work(gl, 0); return; out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_atomic(); return; } /** * glock_set_object - set the gl_object field of a glock * @gl: the glock * @object: the object */ void glock_set_object(struct gfs2_glock *gl, void *object) { void *prev_object; spin_lock(&gl->gl_lockref.lock); prev_object = gl->gl_object; gl->gl_object = object; spin_unlock(&gl->gl_lockref.lock); if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) { pr_warn("glock=%u/%llx\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); gfs2_dump_glock(NULL, gl, true); } } /** * glock_clear_object - clear the gl_object field of a glock * @gl: the glock * @object: object the glock currently points at */ void glock_clear_object(struct gfs2_glock *gl, void *object) { void *prev_object; spin_lock(&gl->gl_lockref.lock); prev_object = gl->gl_object; gl->gl_object = NULL; spin_unlock(&gl->gl_lockref.lock); if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) { pr_warn("glock=%u/%llx\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); gfs2_dump_glock(NULL, gl, true); } } void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) { struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; if (ri->ri_magic == 0) ri->ri_magic = cpu_to_be32(GFS2_MAGIC); if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) ri->ri_generation_deleted = cpu_to_be64(generation); } bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) { struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) return false; return generation <= be64_to_cpu(ri->ri_generation_deleted); } static void gfs2_glock_poke(struct gfs2_glock *gl) { int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; struct gfs2_holder gh; int error; __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); error = gfs2_glock_nq(&gh); if (!error) gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); } static bool gfs2_try_evict(struct gfs2_glock *gl) { struct gfs2_inode *ip; bool evicted = false; /* * If there is contention on the iopen glock and we have an inode, try * to grab and release the inode so that it can be evicted. This will * allow the remote node to go ahead and delete the inode without us * having to do it, which will avoid rgrp glock thrashing. * * The remote node is likely still holding the corresponding inode * glock, so it will run before we get to verify that the delete has * happened below. */ spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip && !igrab(&ip->i_inode)) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { gl->gl_no_formal_ino = ip->i_no_formal_ino; set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); iput(&ip->i_inode); /* If the inode was evicted, gl->gl_object will now be NULL. */ spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); if (!igrab(&ip->i_inode)) ip = NULL; } spin_unlock(&gl->gl_lockref.lock); if (ip) { gfs2_glock_poke(ip->i_gl); iput(&ip->i_inode); } evicted = !ip; } return evicted; } bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) return false; return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); } static bool gfs2_queue_verify_evict(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (test_and_set_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) return false; return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 5 * HZ); } static void delete_work_func(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { /* * If we can evict the inode, give the remote node trying to * delete the inode some time before verifying that the delete * has happened. Otherwise, if we cause contention on the inode glock * immediately, the remote node will think that we still have * the inode in use, and so it will give up waiting. * * If we can't evict the inode, signal to the remote node that * the inode is still in use. We'll later try to delete the * inode locally in gfs2_evict_inode. * * FIXME: We only need to verify that the remote node has * deleted the inode because nodes before this remote delete * rework won't cooperate. At a later time, when we no longer * care about compatibility with such nodes, we can skip this * step entirely. */ if (gfs2_try_evict(gl)) { if (test_bit(SDF_KILL, &sdp->sd_flags)) goto out; if (gfs2_queue_verify_evict(gl)) return; } goto out; } if (test_and_clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) { inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -EAGAIN && !test_bit(SDF_KILL, &sdp->sd_flags) && gfs2_queue_verify_evict(gl)) return; } else { d_prune_aliases(inode); iput(inode); } } out: gfs2_glock_put(gl); } static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); unsigned int drop_refs = 1; if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); drop_refs++; } spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; holdtime = gl->gl_tchange + gl->gl_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); gfs2_set_demote(gl); } } run_queue(gl, 0); if (delay) { /* Keep one glock reference for the work we requeue. */ drop_refs--; if (gl->gl_name.ln_type != LM_TYPE_INODE) delay = 0; __gfs2_glock_queue_work(gl, delay); } /* * Drop the remaining glock references manually here. (Mind that * __gfs2_glock_queue_work depends on the lockref spinlock begin held * here as well.) */ gl->gl_lockref.count -= drop_refs; if (!gl->gl_lockref.count) { __gfs2_glock_put(gl); return; } spin_unlock(&gl->gl_lockref.lock); } static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, struct gfs2_glock *new) { struct wait_glock_queue wait; wait_queue_head_t *wq = glock_waitqueue(name); struct gfs2_glock *gl; wait.name = name; init_wait(&wait.wait); wait.wait.func = glock_wake_function; again: prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); rcu_read_lock(); if (new) { gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, &new->gl_node, ht_parms); if (IS_ERR(gl)) goto out; } else { gl = rhashtable_lookup_fast(&gl_hash_table, name, ht_parms); } if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { rcu_read_unlock(); schedule(); goto again; } out: rcu_read_unlock(); finish_wait(wq, &wait.wait); return gl; } /** * gfs2_glock_get() - Get a glock, or create one if one doesn't exist * @sdp: The GFS2 superblock * @number: the lock number * @glops: The glock_operations to use * @create: If 0, don't create the glock if it doesn't exist * @glp: the glock is returned here * * This does not lock a glock, just finds/creates structures for one. * * Returns: errno */ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp) { struct super_block *s = sdp->sd_vfs; struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; struct gfs2_glock *gl, *tmp; struct address_space *mapping; int ret = 0; gl = find_insert_glock(&name, NULL); if (gl) { *glp = gl; return 0; } if (!create) return -ENOENT; if (glops->go_flags & GLOF_ASPACE) { struct gfs2_glock_aspace *gla = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); if (!gla) return -ENOMEM; gl = &gla->glock; } else { gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); if (!gl) return -ENOMEM; } memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); gl->gl_ops = glops; if (glops->go_flags & GLOF_LVB) { gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { gfs2_glock_dealloc(&gl->gl_rcu); return -ENOMEM; } } atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; gl->gl_name = name; lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_dstamp = 0; preempt_disable(); /* We use the global stats to estimate the initial per-glock stats */ gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; preempt_enable(); gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); if (gl->gl_name.ln_type == LM_TYPE_IOPEN) INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); mapping = gfs2_glock2aspace(gl); if (mapping) { mapping->a_ops = &gfs2_meta_aops; mapping->host = s->s_bdev->bd_inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->private_data = NULL; mapping->writeback_index = 0; } tmp = find_insert_glock(&name, gl); if (!tmp) { *glp = gl; goto out; } if (IS_ERR(tmp)) { ret = PTR_ERR(tmp); goto out_free; } *glp = tmp; out_free: gfs2_glock_dealloc(&gl->gl_rcu); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_kill_wait); out: return ret; } /** * __gfs2_holder_init - initialize a struct gfs2_holder in the default way * @gl: the glock * @state: the state we're requesting * @flags: the modifier flags * @gh: the holder structure * */ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gfs2_glock_hold(gl); gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_iflags = 0; } /** * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it * @state: the state we're requesting * @flags: the modifier flags * @gh: the holder structure * * Don't mess with the glock. * */ void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) { gh->gh_state = state; gh->gh_flags = flags; gh->gh_iflags = 0; gh->gh_ip = _RET_IP_; put_pid(gh->gh_owner_pid); gh->gh_owner_pid = get_pid(task_pid(current)); } /** * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) * @gh: the holder structure * */ void gfs2_holder_uninit(struct gfs2_holder *gh) { put_pid(gh->gh_owner_pid); gfs2_glock_put(gh->gh_gl); gfs2_holder_mark_uninitialized(gh); gh->gh_ip = 0; } static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, unsigned long start_time) { /* Have we waited longer that a second? */ if (time_after(jiffies, start_time + HZ)) { /* Lengthen the minimum hold time. */ gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); } } /** * gfs2_glock_holder_ready - holder is ready and its error code can be collected * @gh: the glock holder * * Called when a glock holder no longer needs to be waited for because it is * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has * failed (gh_error != 0). */ int gfs2_glock_holder_ready(struct gfs2_holder *gh) { if (gh->gh_error || (gh->gh_flags & GL_SKIP)) return gh->gh_error; gh->gh_error = gfs2_instantiate(gh); if (gh->gh_error) gfs2_glock_dq(gh); return gh->gh_error; } /** * gfs2_glock_wait - wait on a glock acquisition * @gh: the glock holder * * Returns: 0 on success */ int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long start_time = jiffies; might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); gfs2_glock_update_hold_time(gh->gh_gl, start_time); return gfs2_glock_holder_ready(gh); } static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) { int i; for (i = 0; i < num_gh; i++) if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) return 1; return 0; } /** * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions * @num_gh: the number of holders in the array * @ghs: the glock holder array * * Returns: 0 on success, meaning all glocks have been granted and are held. * -ESTALE if the request timed out, meaning all glocks were released, * and the caller should retry the operation. */ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) { struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; int i, ret = 0, timeout = 0; unsigned long start_time = jiffies; might_sleep(); /* * Total up the (minimum hold time * 2) of all glocks and use that to * determine the max amount of time we should wait. */ for (i = 0; i < num_gh; i++) timeout += ghs[i].gh_gl->gl_hold_time << 1; if (!wait_event_timeout(sdp->sd_async_glock_wait, !glocks_pending(num_gh, ghs), timeout)) { ret = -ESTALE; /* request timed out. */ goto out; } for (i = 0; i < num_gh; i++) { struct gfs2_holder *gh = &ghs[i]; int ret2; if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_update_hold_time(gh->gh_gl, start_time); } ret2 = gfs2_glock_holder_ready(gh); if (!ret) ret = ret2; } out: if (ret) { for (i = 0; i < num_gh; i++) { struct gfs2_holder *gh = &ghs[i]; gfs2_glock_dq(gh); } } return ret; } /** * handle_callback - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * @delay: zero to demote immediately; otherwise pending demote * @remote: true if this came from a different cluster node * * There are only two requests that we are going to see in actual * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ static void handle_callback(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote) { if (delay) set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); else gfs2_set_demote(gl); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; } else if (gl->gl_demote_state != LM_ST_UNLOCKED && gl->gl_demote_state != state) { gl->gl_demote_state = LM_ST_UNLOCKED; } if (gl->gl_ops->go_callback) gl->gl_ops->go_callback(gl, remote); trace_gfs2_demote_rq(gl, remote); } void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); if (seq) { seq_vprintf(seq, fmt, args); } else { vaf.fmt = fmt; vaf.va = &args; pr_err("%pV", &vaf); } va_end(args); } static inline bool pid_is_meaningful(const struct gfs2_holder *gh) { if (!(gh->gh_flags & GL_NOPID)) return true; if (gh->gh_state == LM_ST_UNLOCKED) return true; return false; } /** * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add * * Eventually we should move the recursive locking trap to a * debugging option or something like that. This is the fast * path and needs to have the minimum number of distractions. * */ static inline void add_to_queue(struct gfs2_holder *gh) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; int try_futile = 0; GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) GLOCK_BUG_ON(gl, true); if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) { struct gfs2_holder *current_gh; current_gh = find_first_holder(gl); try_futile = !may_grant(gl, current_gh, gh); } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } list_for_each_entry(gh2, &gl->gl_holders, gh_list) { if (likely(gh2->gh_owner_pid != gh->gh_owner_pid)) continue; if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) continue; if (!pid_is_meaningful(gh2)) continue; goto trap_recursive; } list_for_each_entry(gh2, &gl->gl_holders, gh_list) { if (try_futile && !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); return; } if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) continue; } trace_gfs2_glock_queue(gh, 1); gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); if (likely(insert_pt == NULL)) { list_add_tail(&gh->gh_list, &gl->gl_holders); return; } list_add_tail(&gh->gh_list, insert_pt); spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) sdp->sd_lockstruct.ls_ops->lm_cancel(gl); spin_lock(&gl->gl_lockref.lock); return; trap_recursive: fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); fs_err(sdp, "lock type: %d req lock state : %d\n", gh2->gh_gl->gl_name.ln_type, gh2->gh_state); fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); fs_err(sdp, "lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); gfs2_dump_glock(NULL, gl, true); BUG(); } /** * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) * @gh: the holder structure * * if (gh->gh_flags & GL_ASYNC), this never returns an error * * Returns: 0, GLR_TRYFAILED, or errno on failure */ int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; int error = 0; if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gl->gl_lockref.count++; __gfs2_glock_queue_work(gl, 0); } run_queue(gl, 1); spin_unlock(&gl->gl_lockref.lock); if (!(gh->gh_flags & GL_ASYNC)) error = gfs2_glock_wait(gh); return error; } /** * gfs2_glock_poll - poll to see if an async request has been completed * @gh: the holder * * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on */ int gfs2_glock_poll(struct gfs2_holder *gh) { return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } static inline bool needs_demote(struct gfs2_glock *gl) { return (test_bit(GLF_DEMOTE, &gl->gl_flags) || test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); } static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; unsigned delay = 0; int fast_path = 0; /* * This holder should not be cached, so mark it for demote. * Note: this should be done before the check for needs_demote * below. */ if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_glock_queue(gh, 0); /* * If there hasn't been a demote request we are done. * (Let the remaining holders, if any, keep holding it.) */ if (!needs_demote(gl)) { if (list_empty(&gl->gl_holders)) fast_path = 1; } if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) gfs2_glock_add_to_lru(gl); if (unlikely(!fast_path)) { gl->gl_lockref.count++; if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && !test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_name.ln_type == LM_TYPE_INODE) delay = gl->gl_hold_time; __gfs2_glock_queue_work(gl, delay); } } /** * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) * @gh: the glock holder * */ void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; spin_lock(&gl->gl_lockref.lock); if (!gfs2_holder_queued(gh)) { /* * May have already been dequeued because the locking request * was GL_ASYNC and it has failed in the meantime. */ goto out; } if (list_is_first(&gh->gh_list, &gl->gl_holders) && !test_bit(HIF_HOLDER, &gh->gh_iflags)) { spin_unlock(&gl->gl_lockref.lock); gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); spin_lock(&gl->gl_lockref.lock); } /* * If we're in the process of file system withdraw, we cannot just * dequeue any glocks until our journal is recovered, lest we introduce * file system corruption. We need two exceptions to this rule: We need * to allow unlocking of nondisk glocks and the glock for our own * journal that needs recovery. */ if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && glock_blocked_by_withdraw(gl) && gh->gh_gl != sdp->sd_jinode_gl) { sdp->sd_glock_dqs_held++; spin_unlock(&gl->gl_lockref.lock); might_sleep(); wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, TASK_UNINTERRUPTIBLE); spin_lock(&gl->gl_lockref.lock); } __gfs2_glock_dq(gh); out: spin_unlock(&gl->gl_lockref.lock); } void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); might_sleep(); wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); } /** * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it * @gh: the holder structure * */ void gfs2_glock_dq_uninit(struct gfs2_holder *gh) { gfs2_glock_dq(gh); gfs2_holder_uninit(gh); } /** * gfs2_glock_nq_num - acquire a glock based on lock number * @sdp: the filesystem * @number: the lock number * @glops: the glock operations for the type of glock * @state: the state to acquire the glock in * @flags: modifier flags for the acquisition * @gh: the struct gfs2_holder * * Returns: errno */ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, unsigned int state, u16 flags, struct gfs2_holder *gh) { struct gfs2_glock *gl; int error; error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); if (!error) { error = gfs2_glock_nq_init(gl, state, flags, gh); gfs2_glock_put(gl); } return error; } /** * glock_compare - Compare two struct gfs2_glock structures for sorting * @arg_a: the first structure * @arg_b: the second structure * */ static int glock_compare(const void *arg_a, const void *arg_b) { const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; const struct lm_lockname *a = &gh_a->gh_gl->gl_name; const struct lm_lockname *b = &gh_b->gh_gl->gl_name; if (a->ln_number > b->ln_number) return 1; if (a->ln_number < b->ln_number) return -1; BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); return 0; } /** * nq_m_sync - synchronously acquire more than one glock in deadlock free order * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * @p: placeholder for the holder structure to pass back * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) */ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, struct gfs2_holder **p) { unsigned int x; int error = 0; for (x = 0; x < num_gh; x++) p[x] = &ghs[x]; sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); for (x = 0; x < num_gh; x++) { error = gfs2_glock_nq(p[x]); if (error) { while (x--) gfs2_glock_dq(p[x]); break; } } return error; } /** * gfs2_glock_nq_m - acquire multiple glocks * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) */ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) { struct gfs2_holder *tmp[4]; struct gfs2_holder **pph = tmp; int error = 0; switch(num_gh) { case 0: return 0; case 1: return gfs2_glock_nq(ghs); default: if (num_gh <= 4) break; pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), GFP_NOFS); if (!pph) return -ENOMEM; } error = nq_m_sync(num_gh, ghs, pph); if (pph != tmp) kfree(pph); return error; } /** * gfs2_glock_dq_m - release multiple glocks * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * */ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) { while (num_gh--) gfs2_glock_dq(&ghs[num_gh]); } void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { unsigned long delay = 0; unsigned long holdtime; unsigned long now = jiffies; gfs2_glock_hold(gl); spin_lock(&gl->gl_lockref.lock); holdtime = gl->gl_tchange + gl->gl_hold_time; if (!list_empty(&gl->gl_holders) && gl->gl_name.ln_type == LM_TYPE_INODE) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) delay = gl->gl_hold_time; } handle_callback(gl, state, delay, true); __gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); } /** * gfs2_should_freeze - Figure out if glock should be frozen * @gl: The glock in question * * Glocks are not frozen if (a) the result of the dlm operation is * an error, (b) the locking operation was an unlock operation or * (c) if there is a "noexp" flagged request anywhere in the queue * * Returns: 1 if freezing should occur, 0 otherwise */ static int gfs2_should_freeze(const struct gfs2_glock *gl) { const struct gfs2_holder *gh; if (gl->gl_reply & ~LM_OUT_ST_MASK) return 0; if (gl->gl_target == LM_ST_UNLOCKED) return 0; list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (LM_FLAG_NOEXP & gh->gh_flags) return 0; } return 1; } /** * gfs2_glock_complete - Callback used by locking * @gl: Pointer to the glock * @ret: The return value from the dlm * * The gl_reply field is under the gl_lockref.lock lock so that it is ok * to use a bitfield shared with other glock state fields. */ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; spin_lock(&gl->gl_lockref.lock); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); return; } } gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); __gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } static int glock_cmp(void *priv, const struct list_head *a, const struct list_head *b) { struct gfs2_glock *gla, *glb; gla = list_entry(a, struct gfs2_glock, gl_lru); glb = list_entry(b, struct gfs2_glock, gl_lru); if (gla->gl_name.ln_number > glb->gl_name.ln_number) return 1; if (gla->gl_name.ln_number < glb->gl_name.ln_number) return -1; return 0; } /** * gfs2_dispose_glock_lru - Demote a list of glocks * @list: The list to dispose of * * Disposing of glocks may involve disk accesses, so that here we sort * the glocks by number (i.e. disk location of the inodes) so that if * there are any such accesses, they'll be sent in order (mostly). * * Must be called under the lru_lock, but may drop and retake this * lock. While the lru_lock is dropped, entries may vanish from the * list, but no new entries will appear on the list (since it is * private) */ static void gfs2_dispose_glock_lru(struct list_head *list) __releases(&lru_lock) __acquires(&lru_lock) { struct gfs2_glock *gl; list_sort(NULL, list, glock_cmp); while(!list_empty(list)) { gl = list_first_entry(list, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); clear_bit(GLF_LRU, &gl->gl_flags); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); set_bit(GLF_LRU, &gl->gl_flags); atomic_inc(&lru_count); continue; } if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); __gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } } /** * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote * @nr: The number of entries to scan * * This function selects the entries on the LRU which are able to * be demoted, and then kicks off the process by calling * gfs2_dispose_glock_lru() above. */ static long gfs2_scan_glock_lru(int nr) { struct gfs2_glock *gl, *next; LIST_HEAD(dispose); long freed = 0; spin_lock(&lru_lock); list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { if (nr-- <= 0) break; /* Test for being demotable */ if (!test_bit(GLF_LOCK, &gl->gl_flags)) { if (!spin_trylock(&gl->gl_lockref.lock)) continue; if (gl->gl_lockref.count <= 1 && (gl->gl_state == LM_ST_UNLOCKED || demote_ok(gl))) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; } spin_unlock(&gl->gl_lockref.lock); } } if (!list_empty(&dispose)) gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); return freed; } static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { if (!(sc->gfp_mask & __GFP_FS)) return SHRINK_STOP; return gfs2_scan_glock_lru(sc->nr_to_scan); } static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker *glock_shrinker; /** * glock_hash_walk - Call a function for glock in a hash bucket * @examiner: the function * @sdp: the filesystem * * Note that the function can be called multiple times on the same * object. So the user must ensure that the function can cope with * that. */ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; struct rhashtable_iter iter; rhashtable_walk_enter(&gl_hash_table, &iter); do { rhashtable_walk_start(&iter); while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { if (gl->gl_name.ln_sbd == sdp) examiner(gl); } rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); rhashtable_walk_exit(&iter); } void gfs2_cancel_delete_work(struct gfs2_glock *gl) { clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags); if (cancel_delayed_work(&gl->gl_delete)) gfs2_glock_put(gl); } static void flush_delete_work(struct gfs2_glock *gl) { if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (cancel_delayed_work(&gl->gl_delete)) { queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); } } } void gfs2_flush_delete_work(struct gfs2_sbd *sdp) { glock_hash_walk(flush_delete_work, sdp); flush_workqueue(sdp->sd_delete_wq); } /** * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw * */ static void thaw_glock(struct gfs2_glock *gl) { if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) return; if (!lockref_get_not_dead(&gl->gl_lockref)) return; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); } /** * clear_glock - look at a glock and see if we can free it from glock cache * @gl: the glock to look at * */ static void clear_glock(struct gfs2_glock *gl) { gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); if (!__lockref_is_dead(&gl->gl_lockref)) { gl->gl_lockref.count++; if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); __gfs2_glock_queue_work(gl, 0); } spin_unlock(&gl->gl_lockref.lock); } /** * gfs2_glock_thaw - Thaw any frozen glocks * @sdp: The super block * */ void gfs2_glock_thaw(struct gfs2_sbd *sdp) { glock_hash_walk(thaw_glock, sdp); } static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { spin_lock(&gl->gl_lockref.lock); gfs2_dump_glock(seq, gl, fsid); spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) { dump_glock(NULL, gl, true); } static void withdraw_dq(struct gfs2_glock *gl) { spin_lock(&gl->gl_lockref.lock); if (!__lockref_is_dead(&gl->gl_lockref) && glock_blocked_by_withdraw(gl)) do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ spin_unlock(&gl->gl_lockref.lock); } void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) { glock_hash_walk(withdraw_dq, sdp); } /** * gfs2_gl_hash_clear - Empty out the glock hash table * @sdp: the filesystem * * Called when unmounting the filesystem. */ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); flush_workqueue(glock_workqueue); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event_timeout(sdp->sd_kill_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); glock_hash_walk(dump_glock_func, sdp); } static const char *state2str(unsigned state) { switch(state) { case LM_ST_UNLOCKED: return "UN"; case LM_ST_SHARED: return "SH"; case LM_ST_DEFERRED: return "DF"; case LM_ST_EXCLUSIVE: return "EX"; } return "??"; } static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) { char *p = buf; if (flags & LM_FLAG_TRY) *p++ = 't'; if (flags & LM_FLAG_TRY_1CB) *p++ = 'T'; if (flags & LM_FLAG_NOEXP) *p++ = 'e'; if (flags & LM_FLAG_ANY) *p++ = 'A'; if (flags & LM_FLAG_NODE_SCOPE) *p++ = 'n'; if (flags & GL_ASYNC) *p++ = 'a'; if (flags & GL_EXACT) *p++ = 'E'; if (flags & GL_NOCACHE) *p++ = 'c'; if (test_bit(HIF_HOLDER, &iflags)) *p++ = 'H'; if (test_bit(HIF_WAIT, &iflags)) *p++ = 'W'; if (flags & GL_SKIP) *p++ = 's'; *p = 0; return buf; } /** * dump_holder - print information about a glock holder * @seq: the seq_file struct * @gh: the glock holder * @fs_id_buf: pointer to file system id (if requested) * */ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, const char *fs_id_buf) { const char *comm = "(none)"; pid_t owner_pid = 0; char flags_buf[32]; rcu_read_lock(); if (pid_is_meaningful(gh)) { struct task_struct *gh_owner; comm = "(ended)"; owner_pid = pid_nr(gh->gh_owner_pid); gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); if (gh_owner) comm = gh_owner->comm; } gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", fs_id_buf, state2str(gh->gh_state), hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip); rcu_read_unlock(); } static const char *gflags2str(char *buf, const struct gfs2_glock *gl) { const unsigned long *gflags = &gl->gl_flags; char *p = buf; if (test_bit(GLF_LOCK, gflags)) *p++ = 'l'; if (test_bit(GLF_DEMOTE, gflags)) *p++ = 'D'; if (test_bit(GLF_PENDING_DEMOTE, gflags)) *p++ = 'd'; if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) *p++ = 'p'; if (test_bit(GLF_DIRTY, gflags)) *p++ = 'y'; if (test_bit(GLF_LFLUSH, gflags)) *p++ = 'f'; if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) *p++ = 'i'; if (test_bit(GLF_REPLY_PENDING, gflags)) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) *p++ = 'I'; if (test_bit(GLF_FROZEN, gflags)) *p++ = 'F'; if (!list_empty(&gl->gl_holders)) *p++ = 'q'; if (test_bit(GLF_LRU, gflags)) *p++ = 'L'; if (gl->gl_object) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; if (test_bit(GLF_FREEING, gflags)) *p++ = 'x'; if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) *p++ = 'n'; if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) *p++ = 'N'; if (test_bit(GLF_TRY_TO_EVICT, gflags)) *p++ = 'e'; if (test_bit(GLF_VERIFY_EVICT, gflags)) *p++ = 'E'; *p = 0; return buf; } /** * gfs2_dump_glock - print information about a glock * @seq: The seq_file struct * @gl: the glock * @fsid: If true, also dump the file system id * * The file format is as follows: * One line per object, capital letters are used to indicate objects * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, * other objects are indented by a single space and follow the glock to * which they are related. Fields are indicated by lower case letters * followed by a colon and the field value, except for strings which are in * [] so that its possible to see if they are composed of spaces for * example. The field's are n = number (id of the object), f = flags, * t = type, s = state, r = refcount, e = error, p = pid. * */ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; const struct gfs2_holder *gh; char gflags_buf[32]; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; unsigned long nrpages = 0; if (gl->gl_ops->go_flags & GLOF_ASPACE) { struct address_space *mapping = gfs2_glock2aspace(gl); nrpages = mapping->nrpages; } memset(fs_id_buf, 0, sizeof(fs_id_buf)); if (fsid && sdp) /* safety precaution */ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); dtime = jiffies - gl->gl_demote_time; dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " "v:%d r:%d m:%ld p:%lu\n", fs_id_buf, state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, gflags2str(gflags_buf, gl), state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); list_for_each_entry(gh, &gl->gl_holders, gh_list) dump_holder(seq, gh, fs_id_buf); if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) glops->go_dump(seq, gl, fs_id_buf); } static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock *gl = iter_ptr; seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); return 0; } static const char *gfs2_gltype[] = { "type", "reserved", "nondisk", "inode", "rgrp", "meta", "iopen", "flock", "plock", "quota", "journal", }; static const char *gfs2_stype[] = { [GFS2_LKS_SRTT] = "srtt", [GFS2_LKS_SRTTVAR] = "srttvar", [GFS2_LKS_SRTTB] = "srttb", [GFS2_LKS_SRTTVARB] = "srttvarb", [GFS2_LKS_SIRT] = "sirt", [GFS2_LKS_SIRTVAR] = "sirtvar", [GFS2_LKS_DCOUNT] = "dlm", [GFS2_LKS_QCOUNT] = "queue", }; #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) { struct gfs2_sbd *sdp = seq->private; loff_t pos = *(loff_t *)iter_ptr; unsigned index = pos >> 3; unsigned subindex = pos & 0x07; int i; if (index == 0 && subindex != 0) return 0; seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], (index == 0) ? "cpu": gfs2_stype[subindex]); for_each_possible_cpu(i) { const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); if (index == 0) seq_printf(seq, " %15u", i); else seq_printf(seq, " %15llu", (unsigned long long)lkstats-> lkstats[index - 1].stats[subindex]); } seq_putc(seq, '\n'); return 0; } int __init gfs2_glock_init(void) { int i, ret; ret = rhashtable_init(&gl_hash_table, &ht_parms); if (ret < 0) return ret; glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0); if (!glock_workqueue) { rhashtable_destroy(&gl_hash_table); return -ENOMEM; } glock_shrinker = shrinker_alloc(0, "gfs2-glock"); if (!glock_shrinker) { destroy_workqueue(glock_workqueue); rhashtable_destroy(&gl_hash_table); return -ENOMEM; } glock_shrinker->count_objects = gfs2_glock_shrink_count; glock_shrinker->scan_objects = gfs2_glock_shrink_scan; shrinker_register(glock_shrinker); for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) init_waitqueue_head(glock_wait_table + i); return 0; } void gfs2_glock_exit(void) { shrinker_free(glock_shrinker); rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); } static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) { struct gfs2_glock *gl = gi->gl; if (gl) { if (n == 0) return; if (!lockref_put_not_zero(&gl->gl_lockref)) gfs2_glock_queue_put(gl); } for (;;) { gl = rhashtable_walk_next(&gi->hti); if (IS_ERR_OR_NULL(gl)) { if (gl == ERR_PTR(-EAGAIN)) { n = 1; continue; } gl = NULL; break; } if (gl->gl_name.ln_sbd != gi->sdp) continue; if (n <= 1) { if (!lockref_get_not_dead(&gl->gl_lockref)) continue; break; } else { if (__lockref_is_dead(&gl->gl_lockref)) continue; n--; } } gi->gl = gl; } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct gfs2_glock_iter *gi = seq->private; loff_t n; /* * We can either stay where we are, skip to the next hash table * entry, or start from the beginning. */ if (*pos < gi->last_pos) { rhashtable_walk_exit(&gi->hti); rhashtable_walk_enter(&gl_hash_table, &gi->hti); n = *pos + 1; } else { n = *pos - gi->last_pos; } rhashtable_walk_start(&gi->hti); gfs2_glock_iter_next(gi, n); gi->last_pos = *pos; return gi->gl; } static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; (*pos)++; gi->last_pos = *pos; gfs2_glock_iter_next(gi, 1); return gi->gl; } static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) __releases(RCU) { struct gfs2_glock_iter *gi = seq->private; rhashtable_walk_stop(&gi->hti); } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { dump_glock(seq, iter_ptr, false); return 0; } static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) { preempt_disable(); if (*pos >= GFS2_NR_SBSTATS) return NULL; return pos; } static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { (*pos)++; if (*pos >= GFS2_NR_SBSTATS) return NULL; return pos; } static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) { preempt_enable(); } static const struct seq_operations gfs2_glock_seq_ops = { .start = gfs2_glock_seq_start, .next = gfs2_glock_seq_next, .stop = gfs2_glock_seq_stop, .show = gfs2_glock_seq_show, }; static const struct seq_operations gfs2_glstats_seq_ops = { .start = gfs2_glock_seq_start, .next = gfs2_glock_seq_next, .stop = gfs2_glock_seq_stop, .show = gfs2_glstats_seq_show, }; static const struct seq_operations gfs2_sbstats_sops = { .start = gfs2_sbstats_seq_start, .next = gfs2_sbstats_seq_next, .stop = gfs2_sbstats_seq_stop, .show = gfs2_sbstats_seq_show, }; #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) static int __gfs2_glocks_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); if (ret == 0) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; /* * Initially, we are "before" the first hash table entry; the * first call to rhashtable_walk_next gets us the first entry. */ gi->last_pos = -1; gi->gl = NULL; rhashtable_walk_enter(&gl_hash_table, &gi->hti); } return ret; } static int gfs2_glocks_open(struct inode *inode, struct file *file) { return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); } static int gfs2_glocks_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; if (gi->gl) gfs2_glock_put(gi->gl); rhashtable_walk_exit(&gi->hti); return seq_release_private(inode, file); } static int gfs2_glstats_open(struct inode *inode, struct file *file) { return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); } static const struct file_operations gfs2_glocks_fops = { .owner = THIS_MODULE, .open = gfs2_glocks_open, .read = seq_read, .llseek = seq_lseek, .release = gfs2_glocks_release, }; static const struct file_operations gfs2_glstats_fops = { .owner = THIS_MODULE, .open = gfs2_glstats_open, .read = seq_read, .llseek = seq_lseek, .release = gfs2_glocks_release, }; struct gfs2_glockfd_iter { struct super_block *sb; unsigned int tgid; struct task_struct *task; unsigned int fd; struct file *file; }; static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i) { struct pid_namespace *ns = task_active_pid_ns(current); struct pid *pid; if (i->task) put_task_struct(i->task); rcu_read_lock(); retry: i->task = NULL; pid = find_ge_pid(i->tgid, ns); if (pid) { i->tgid = pid_nr_ns(pid, ns); i->task = pid_task(pid, PIDTYPE_TGID); if (!i->task) { i->tgid++; goto retry; } get_task_struct(i->task); } rcu_read_unlock(); return i->task; } static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) { if (i->file) { fput(i->file); i->file = NULL; } rcu_read_lock(); for(;; i->fd++) { struct inode *inode; i->file = task_lookup_next_fdget_rcu(i->task, &i->fd); if (!i->file) { i->fd = 0; break; } inode = file_inode(i->file); if (inode->i_sb == i->sb) break; rcu_read_unlock(); fput(i->file); rcu_read_lock(); } rcu_read_unlock(); return i->file; } static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glockfd_iter *i = seq->private; if (*pos) return NULL; while (gfs2_glockfd_next_task(i)) { if (gfs2_glockfd_next_file(i)) return i; i->tgid++; } return NULL; } static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { struct gfs2_glockfd_iter *i = seq->private; (*pos)++; i->fd++; do { if (gfs2_glockfd_next_file(i)) return i; i->tgid++; } while (gfs2_glockfd_next_task(i)); return NULL; } static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glockfd_iter *i = seq->private; if (i->file) fput(i->file); if (i->task) put_task_struct(i->task); } static void gfs2_glockfd_seq_show_flock(struct seq_file *seq, struct gfs2_glockfd_iter *i) { struct gfs2_file *fp = i->file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED }; if (!READ_ONCE(fl_gh->gh_gl)) return; spin_lock(&i->file->f_lock); if (gfs2_holder_initialized(fl_gh)) gl_name = fl_gh->gh_gl->gl_name; spin_unlock(&i->file->f_lock); if (gl_name.ln_type != LM_TYPE_RESERVED) { seq_printf(seq, "%d %u %u/%llx\n", i->tgid, i->fd, gl_name.ln_type, (unsigned long long)gl_name.ln_number); } } static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr) { struct gfs2_glockfd_iter *i = seq->private; struct inode *inode = file_inode(i->file); struct gfs2_glock *gl; inode_lock_shared(inode); gl = GFS2_I(inode)->i_iopen_gh.gh_gl; if (gl) { seq_printf(seq, "%d %u %u/%llx\n", i->tgid, i->fd, gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); } gfs2_glockfd_seq_show_flock(seq, i); inode_unlock_shared(inode); return 0; } static const struct seq_operations gfs2_glockfd_seq_ops = { .start = gfs2_glockfd_seq_start, .next = gfs2_glockfd_seq_next, .stop = gfs2_glockfd_seq_stop, .show = gfs2_glockfd_seq_show, }; static int gfs2_glockfd_open(struct inode *inode, struct file *file) { struct gfs2_glockfd_iter *i; struct gfs2_sbd *sdp = inode->i_private; i = __seq_open_private(file, &gfs2_glockfd_seq_ops, sizeof(struct gfs2_glockfd_iter)); if (!i) return -ENOMEM; i->sb = sdp->sd_vfs; return 0; } static const struct file_operations gfs2_glockfd_fops = { .owner = THIS_MODULE, .open = gfs2_glockfd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, }; DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) { sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_glocks_fops); debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_glockfd_fops); debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_glstats_fops); debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_sbstats_fops); } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) { debugfs_remove_recursive(sdp->debugfs_dir); sdp->debugfs_dir = NULL; } void gfs2_register_debugfs(void) { gfs2_root = debugfs_create_dir("gfs2", NULL); } void gfs2_unregister_debugfs(void) { debugfs_remove(gfs2_root); gfs2_root = NULL; } |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | /* * net/tipc/diag.c: TIPC socket diag * * Copyright (c) 2018, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "socket.h" #include <linux/sock_diag.h> #include <linux/tipc_sockets_diag.h> static u64 __tipc_diag_gen_cookie(struct sock *sk) { u32 res[2]; sock_diag_save_cookie(sk, res); return *((u64 *)res); } static int __tipc_add_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk) { struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh); struct nlmsghdr *nlh; int err; nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0, NLM_F_MULTI); if (!nlh) return -EMSGSIZE; err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states, __tipc_diag_gen_cookie); if (err) return err; nlmsg_end(skb, nlh); return 0; } static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag); } static int tipc_sock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct tipc_sock_diag_req); struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; if (h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = tipc_dump_start, .dump = tipc_diag_dump, .done = tipc_dump_done, }; netlink_dump_start(net->diag_nlsk, skb, h, &c); return 0; } return -EOPNOTSUPP; } static const struct sock_diag_handler tipc_sock_diag_handler = { .family = AF_TIPC, .dump = tipc_sock_diag_handler_dump, }; static int __init tipc_diag_init(void) { return sock_diag_register(&tipc_sock_diag_handler); } static void __exit tipc_diag_exit(void) { sock_diag_unregister(&tipc_sock_diag_handler); } module_init(tipc_diag_init); module_exit(tipc_diag_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); |
259 94 121 64 25 49 37 37 164 189 69 29 44 70 138 38 8 1 29 2 27 1 50 50 1 3 51 6 45 12 3 65 62 3 3 63 105 93 26 3 25 105 93 93 25 214 215 215 198 4 1 27 27 22 5 23 6 6 558 434 22 92 348 55 47 16 31 184 118 334 101 27 302 110 18 94 352 199 199 199 199 199 196 105 92 || // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET transport hashtables * * Authors: Lotsa people, from code originally in tcp */ #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/memblock.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/inet6_hashtables.h> #endif #include <net/secure_seq.h> #include <net/ip.h> #include <net/tcp.h> #include <net/sock_reuseport.h> u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { static u32 inet_ehash_secret __read_mostly; net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); return __inet_ehashfn(laddr, lport, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet_ehashfn); /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. */ static u32 sk_ehashfn(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) return inet6_ehashfn(sock_net(sk), &sk->sk_v6_rcv_saddr, sk->sk_num, &sk->sk_v6_daddr, sk->sk_dport); #endif return inet_ehashfn(sock_net(sk), sk->sk_rcv_saddr, sk->sk_num, sk->sk_daddr, sk->sk_dport); } /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb) { write_pnet(&tb->ib_net, net); tb->l3mdev = l3mdev; tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); } return tb; } /* * Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } } bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, unsigned short port, int l3mdev) { return net_eq(ib_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev; } static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, struct net *net, struct inet_bind_hashbucket *head, unsigned short port, int l3mdev, const struct sock *sk) { write_pnet(&tb->ib_net, net); tb->l3mdev = l3mdev; tb->port = port; #if IS_ENABLED(CONFIG_IPV6) tb->family = sk->sk_family; if (sk->sk_family == AF_INET6) tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; else #endif tb->rcv_saddr = sk->sk_rcv_saddr; INIT_HLIST_HEAD(&tb->owners); INIT_HLIST_HEAD(&tb->deathrow); hlist_add_head(&tb->node, &head->chain); } struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, unsigned short port, int l3mdev, const struct sock *sk) { struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb) inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk); return tb; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } } static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb2->family) { if (sk->sk_family == AF_INET) return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; } if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); #endif return tb2->rcv_saddr == sk->sk_rcv_saddr; } void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { inet_sk(sk)->inet_num = port; sk_add_bind_node(sk, &tb->owners); inet_csk(sk)->icsk_bind_hash = tb; sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; } /* * Get rid of any references to a local port held by the given sock. */ static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); struct inet_bind_bucket *tb; int bhash; bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size); head = &hashinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num); spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_lock(&head2->lock); if (inet_csk(sk)->icsk_bind2_hash) { struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; __sk_del_bind2_node(sk); inet_csk(sk)->icsk_bind2_hash = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); } spin_unlock(&head2->lock); spin_unlock(&head->lock); } void inet_put_port(struct sock *sk) { local_bh_disable(); __inet_put_port(sk); local_bh_enable(); } EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; struct net *net = sock_net(sk); bool update_fastreuse = false; struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; int bhash, l3mdev; bhash = inet_bhashfn(net, port, table->bhash_size); head = &table->bhash[bhash]; head2 = inet_bhashfn_portaddr(table, child, net, port); spin_lock(&head->lock); spin_lock(&head2->lock); tb = inet_csk(sk)->icsk_bind_hash; tb2 = inet_csk(sk)->icsk_bind2_hash; if (unlikely(!tb || !tb2)) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOENT; } if (tb->port != port) { l3mdev = inet_sk_bound_l3mdev(sk); /* NOTE: using tproxy and redirecting skbs to a proxy * on a different listener port breaks the assumption * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } created_inet_bind_bucket = true; } update_fastreuse = true; goto bhash2_find; } else if (!inet_bind2_bucket_addr_match(tb2, child)) { l3mdev = inet_sk_bound_l3mdev(sk); bhash2_find: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); if (!tb2) { tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, net, head2, port, l3mdev, child); if (!tb2) goto error; } } if (update_fastreuse) inet_csk_update_fastreuse(tb, child); inet_bind_hash(child, tb, tb2, port); spin_unlock(&head2->lock); spin_unlock(&head->lock); return 0; error: if (created_inet_bind_bucket) inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } EXPORT_SYMBOL_GPL(__inet_inherit_port); static struct inet_listen_hashbucket * inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) { u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); else #endif hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); return inet_lhash2_bucket(h, hash); } static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (sk->sk_family == PF_INET) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary. * @net: network namespace. * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, inet_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet_lookup_reuseport); /* * Here are some nice properties to exploit here. The BSD API * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ /* called with rcu_read_lock() : No refcount taken on the socket */ static struct sock *inet_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet_lookup_run_sk_lookup(struct net *net, int protocol, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet_ehashfn); if (result) goto done; } hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with INADDR_ANY */ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_free(inet_twsk(sk)); else if (sk->sk_state == TCP_NEW_SYN_RECV) reqsk_free(inet_reqsk(sk)); else sk_free(sk); } EXPORT_SYMBOL_GPL(sock_gen_put); void sock_edemux(struct sk_buff *skb) { sock_gen_put(skb->sk); } EXPORT_SYMBOL(sock_edemux); struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif) { INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet_match(net, sk, acookie, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, inet->inet_daddr, inet->inet_dport); } /* Searches for an exsiting socket in the ehash bucket list. * Returns true if found, false otherwise. */ static bool inet_ehash_lookup_by_sk(struct sock *sk, struct hlist_nulls_head *list) { const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); const int sdif = sk->sk_bound_dev_if; const int dif = sk->sk_bound_dev_if; const struct hlist_nulls_node *node; struct net *net = sock_net(sk); struct sock *esk; INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); sk_nulls_for_each_rcu(esk, node, list) { if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { return true; } } #endif } return false; } /* Insert a socket into ehash, and eventually remove another one * (The another one can be a SYN_RECV or TIMEWAIT) * If an existing socket already exists, socket sk is not inserted, * and sets found_dup_sk parameter to true. */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_ehash_bucket *head; struct hlist_nulls_head *list; spinlock_t *lock; bool ret = true; WARN_ON_ONCE(!sk_unhashed(sk)); sk->sk_hash = sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); } else if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; } if (ret) __sk_nulls_add_node_rcu(sk, list); spin_unlock(lock); return ret; } bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); } return ok; } EXPORT_SYMBOL_GPL(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); } return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); } int __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; if (sk->sk_state != TCP_LISTEN) { local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); spin_lock(&ilb2->lock); if (sk->sk_reuseport) { err = inet_reuseport_add_sock(sk, ilb2); if (err) goto unlock; } sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); return err; } EXPORT_SYMBOL(__inet_hash); int inet_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); if (sk_unhashed(sk)) return; if (sk->sk_state == TCP_LISTEN) { struct inet_listen_hashbucket *ilb2; ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); /* Don't disable bottom halves while acquiring the lock to * avoid circular locking dependency on PREEMPT_RT. */ spin_lock(&ilb2->lock); if (sk_unhashed(sk)) { spin_unlock(&ilb2->lock); return; } if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_stop_listen_sock(sk); __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&ilb2->lock); } else { spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); if (sk_unhashed(sk)) { spin_unlock_bh(lock); return; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb->family) { if (sk->sk_family == AF_INET) return ipv6_addr_any(&tb->v6_rcv_saddr) || ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr); return false; } if (sk->sk_family == AF_INET6) return ipv6_addr_any(&tb->v6_rcv_saddr); #endif return tb->rcv_saddr == 0; } /* The socket's bhash2 hashbucket spinlock must be held when this is called */ struct inet_bind2_bucket * inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { struct inet_bind2_bucket *bhash2 = NULL; inet_bind_bucket_for_each(bhash2, &head->chain) if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) break; return bhash2; } struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(net, &in6addr_any, port); else #endif hash = ipv4_portaddr_hash(net, 0, port); return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } static void inet_update_saddr(struct sock *sk, void *saddr, int family) { if (family == AF_INET) { inet_sk(sk)->inet_saddr = *(__be32 *)saddr; sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); } #if IS_ENABLED(CONFIG_IPV6) else { sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; } #endif } static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); int bhash; if (!inet_csk(sk)->icsk_bind2_hash) { /* Not bind()ed before. */ if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); return 0; } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); if (!new_tb2) { if (reset) { /* The (INADDR_ANY, port) bucket might have already * been freed, then we cannot fixup icsk_bind2_hash, * so we give up and unlink sk from bhash/bhash2 not * to leave inconsistency in bhash2. */ inet_put_port(sk); inet_reset_saddr(sk); } return -ENOMEM; } bhash = inet_bhashfn(net, port, hinfo->bhash_size); head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); /* If we change saddr locklessly, another thread * iterating over bhash might see corrupted address. */ spin_lock_bh(&head->lock); spin_lock(&head2->lock); __sk_del_bind2_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk); } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though * attacks were since demonstrated, thus we use 65536 by default instead * to really give more isolation and privacy, at the expense of 256kB * of kernel memory. */ #define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; struct inet_timewait_sock *tw = NULL; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; bool tb_created = false; u32 remaining, offset; int ret, i, low, high; int l3mdev; u32 index; if (port) { local_bh_disable(); ret = check_established(death_row, sk, port, NULL); local_bh_enable(); return ret; } l3mdev = inet_sk_bound_l3mdev(sk); inet_sk_get_local_port_range(sk, &low, &high); high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; if (likely(remaining > 1)) remaining &= ~1U; get_random_sleepable_once(table_perturb, INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ offset &= ~1U; other_parity_scan: port = low + offset; for (i = 0; i < remaining; i += 2, port += 2) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, port, &tw)) goto ok; goto next_port; } } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock_bh(&head->lock); return -ENOMEM; } tb_created = true; tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; next_port: spin_unlock_bh(&head->lock); cond_resched(); } offset++; if ((offset & 1) && remaining > 1) goto other_parity_scan; return -EADDRNOTAVAIL; ok: /* Find the corresponding tb2 bucket since we need to * add the socket to the bhash2 table as well */ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, head2, port, l3mdev, sk); if (!tb2) goto error; } /* Here we want to add a little bit of randomness to the next source * port that will be chosen. We use a max() with a random here so that * on low contention the randomness is maximal and on high contention * it may be inexistent. */ i = max_t(int, i, get_random_u32_below(8) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head2->lock); spin_unlock(&head->lock); if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; error: spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); spin_unlock_bh(&head->lock); return -ENOMEM; } /* * Bind a port for a connect operation and hash it. */ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); return __inet_hash_connect(death_row, sk, port_offset, __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); static void init_hashinfo_lhash2(struct inet_hashinfo *h) { int i; for (i = 0; i <= h->lhash2_mask; i++) { spin_lock_init(&h->lhash2[i].lock); INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, i + LISTENING_NULLS_BASE); } } void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, scale, 0, NULL, &h->lhash2_mask, low_limit, high_limit); init_hashinfo_lhash2(h); /* this one is used for source ports of outgoing connections */ table_perturb = alloc_large_system_hash("Table-perturb", sizeof(*table_perturb), INET_TABLE_PERTURB_SIZE, 0, 0, NULL, NULL, INET_TABLE_PERTURB_SIZE, INET_TABLE_PERTURB_SIZE); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) { h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL); if (!h->lhash2) return -ENOMEM; h->lhash2_mask = INET_LHTABLE_SIZE - 1; /* INET_LHTABLE_SIZE must be a power of 2 */ BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); init_hashinfo_lhash2(h); return 0; } EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; if (locksz != 0) { /* allocate 2 cache lines or at least one spinlock per cpu */ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!hashinfo->ehash_locks) return -ENOMEM; for (i = 0; i < nblocks; i++) spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = nblocks - 1; return 0; } EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, unsigned int ehash_entries) { struct inet_hashinfo *new_hashinfo; int i; new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL); if (!new_hashinfo) goto err; new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket), GFP_KERNEL_ACCOUNT); if (!new_hashinfo->ehash) goto free_hashinfo; new_hashinfo->ehash_mask = ehash_entries - 1; if (inet_ehash_locks_alloc(new_hashinfo)) goto free_ehash; for (i = 0; i < ehash_entries; i++) INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i); new_hashinfo->pernet = true; return new_hashinfo; free_ehash: vfree(new_hashinfo->ehash); free_hashinfo: kfree(new_hashinfo); err: return NULL; } EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc); void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) { if (!hashinfo->pernet) return; inet_ehash_locks_free(hashinfo); vfree(hashinfo->ehash); kfree(hashinfo); } EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free); |
10 || /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IP6_TUNNEL_H #define _NET_IP6_TUNNEL_H #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> #include <net/ip_tunnels.h> #include <net/dst_cache.h> #define IP6TUNNEL_ERR_TIMEO (30*HZ) /* capable of sending packets */ #define IP6_TNL_F_CAP_XMIT 0x10000 /* capable of receiving packets */ #define IP6_TNL_F_CAP_RCV 0x20000 /* determine capability on a per-packet basis */ #define IP6_TNL_F_CAP_PER_PACKET 0x40000 struct __ip6_tnl_parm { char name[IFNAMSIZ]; /* name of tunnel device */ int link; /* ifindex of underlying L2 interface */ __u8 proto; /* tunnel protocol */ __u8 encap_limit; /* encapsulation limit for tunnel */ __u8 hop_limit; /* hop limit for tunnel */ bool collect_md; __be32 flowinfo; /* traffic class and flowlabel for tunnel */ __u32 flags; /* tunnel flags */ struct in6_addr laddr; /* local tunnel end-point address */ struct in6_addr raddr; /* remote tunnel end-point address */ __be16 i_flags; __be16 o_flags; __be32 i_key; __be32 o_key; __u32 fwmark; __u32 index; /* ERSPAN type II index */ __u8 erspan_ver; /* ERSPAN version */ __u8 dir; /* direction */ __u16 hwid; /* hwid */ }; /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_cache dst_cache; /* cached dst */ struct gro_cells gro_cells; int err_count; unsigned long err_time; /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ atomic_t o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ int encap_hlen; /* Encap header length (FOU,GUE) */ struct ip_tunnel_encap encap; int mlink; }; struct ip6_tnl_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); }; #ifdef CONFIG_INET extern const struct ip6_tnl_encap_ops __rcu * ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num); int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num); int ip6_tnl_encap_setup(struct ip6_tnl *t, struct ip_tunnel_encap *ipencap); static inline int ip6_encap_hlen(struct ip_tunnel_encap *e) { const struct ip6_tnl_encap_ops *ops; int hlen = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(ip6tun_encaps[e->type]); if (likely(ops && ops->encap_hlen)) hlen = ops->encap_hlen(e); rcu_read_unlock(); return hlen; } static inline int ip6_tnl_encap(struct sk_buff *skb, struct ip6_tnl *t, u8 *protocol, struct flowi6 *fl6) { const struct ip6_tnl_encap_ops *ops; int ret = -EINVAL; if (t->encap.type == TUNNEL_ENCAP_NONE) return 0; if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(ip6tun_encaps[t->encap.type]); if (likely(ops && ops->build_header)) ret = ops->build_header(skb, &t->encap, protocol, fl6); rcu_read_unlock(); return ret; } /* Tunnel encapsulation limit destination sub-option */ struct ipv6_tlv_tnl_enc_lim { __u8 type; /* type-code for option */ __u8 length; /* option length */ __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __u8 proto); __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { int pkt_len, err; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (dev) { if (unlikely(net_xmit_eval(err))) pkt_len = -1; iptunnel_xmit_stats(dev, pkt_len); } } #endif #endif |
2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 3 1 2 2 1 3 4 3 1 1 8 8 8 6 1 1 1 1 1 1 1 1 3 3 3 3 3 2 2 2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include <net/genetlink.h> #include <net/sock.h> #include "devl_internal.h" struct devlink_info_req { struct sk_buff *msg; void (*version_cb)(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv); void *version_cb_priv; }; struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; }; static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = { { /* can't reinitialize driver with no down time */ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT, .limit = DEVLINK_RELOAD_LIMIT_NO_RESET, }, }; static bool devlink_reload_combination_is_invalid(enum devlink_reload_action action, enum devlink_reload_limit limit) { int i; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) if (devlink_reload_invalid_combinations[i].action == action && devlink_reload_invalid_combinations[i].limit == limit) return true; return false; } static bool devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action) { return test_bit(action, &devlink->ops->reload_actions); } static bool devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit) { return test_bit(limit, &devlink->ops->reload_limits); } static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value) { struct nlattr *reload_stats_entry; reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY); if (!reload_stats_entry) return -EMSGSIZE; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value)) goto nla_put_failure; nla_nest_end(msg, reload_stats_entry); return 0; nla_put_failure: nla_nest_cancel(msg, reload_stats_entry); return -EMSGSIZE; } static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote) { struct nlattr *reload_stats_attr, *act_info, *act_stats; int i, j, stat_idx; u32 value; if (!is_remote) reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS); else reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS); if (!reload_stats_attr) return -EMSGSIZE; for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || i == DEVLINK_RELOAD_ACTION_UNSPEC) continue; act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); if (!act_info) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) goto action_info_nest_cancel; act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); if (!act_stats) goto action_info_nest_cancel; for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { /* Remote stats are shown even if not locally supported. * Stats of actions with unspecified limit are shown * though drivers don't need to register unspecified * limit. */ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && !devlink_reload_limit_is_supported(devlink, j)) || devlink_reload_combination_is_invalid(i, j)) continue; stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i; if (!is_remote) value = devlink->stats.reload_stats[stat_idx]; else value = devlink->stats.remote_reload_stats[stat_idx]; if (devlink_reload_stat_put(msg, j, value)) goto action_stats_nest_cancel; } nla_nest_end(msg, act_stats); nla_nest_end(msg, act_info); } nla_nest_end(msg, reload_stats_attr); return 0; action_stats_nest_cancel: nla_nest_cancel(msg, act_stats); action_info_nest_cancel: nla_nest_cancel(msg, act_info); nla_put_failure: nla_nest_cancel(msg, reload_stats_attr); return -EMSGSIZE; } static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) { unsigned long rel_index; void *unused; int err; xa_for_each(&devlink->nested_rels, rel_index, unused) { err = devlink_rel_devlink_handle_put(msg, devlink, rel_index, DEVLINK_ATTR_NESTED_DEVLINK, NULL); if (err) return err; } return 0; } static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { struct nlattr *dev_stats; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed)) goto nla_put_failure; dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS); if (!dev_stats) goto nla_put_failure; if (devlink_reload_stats_put(msg, devlink, false)) goto dev_stats_nest_cancel; if (devlink_reload_stats_put(msg, devlink, true)) goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); if (devlink_nl_nested_fill(msg, devlink)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; dev_stats_nest_cancel: nla_nest_cancel(msg, dev_stats); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); } int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) { devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, u32 rel_index) { xa_erase(&devlink->nested_rels, rel_index); } int devl_nested_devlink_set(struct devlink *devlink, struct devlink *nested_devlink) { u32 rel_index; int err; err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, devlink_rel_notify_cb, devlink_rel_cleanup_cb, nested_devlink); if (err) return err; return xa_insert(&devlink->nested_rels, rel_index, xa_mk_value(0), GFP_KERNEL); } EXPORT_SYMBOL_GPL(devl_nested_devlink_set); void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); devlink_linecards_notify_register(devlink); devlink_ports_notify_register(devlink); devlink_trap_policers_notify_register(devlink); devlink_trap_groups_notify_register(devlink); devlink_traps_notify_register(devlink); devlink_rates_notify_register(devlink); devlink_regions_notify_register(devlink); devlink_params_notify_register(devlink); } void devlink_notify_unregister(struct devlink *devlink) { devlink_params_notify_unregister(devlink); devlink_regions_notify_unregister(devlink); devlink_rates_notify_unregister(devlink); devlink_traps_notify_unregister(devlink); devlink_trap_groups_notify_unregister(devlink); devlink_trap_policers_notify_unregister(devlink); devlink_ports_notify_unregister(devlink); devlink_linecards_notify_unregister(devlink); devlink_notify(devlink, DEVLINK_CMD_DEL); } static void devlink_reload_failed_set(struct devlink *devlink, bool reload_failed) { if (devlink->reload_failed == reload_failed) return; devlink->reload_failed = reload_failed; devlink_notify(devlink, DEVLINK_CMD_NEW); } bool devlink_is_reload_failed(const struct devlink *devlink) { return devlink->reload_failed; } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); static void __devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats, enum devlink_reload_limit limit, u32 actions_performed) { unsigned long actions = actions_performed; int stat_idx; int action; for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) { stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action; reload_stats[stat_idx]++; } devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit, actions_performed); } /** * devlink_remote_reload_actions_performed - Update devlink on reload actions * performed which are not a direct result of devlink reload call. * * This should be called by a driver after performing reload actions in case it was not * a result of devlink reload call. For example fw_activate was performed as a result * of devlink reload triggered fw_activate on another host. * The motivation for this function is to keep data on reload actions performed on this * function whether it was done due to direct devlink reload call or not. * * @devlink: devlink * @limit: reload limit * @actions_performed: bitmask of actions performed */ void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { if (WARN_ON(!actions_performed || actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) || limit > DEVLINK_RELOAD_LIMIT_MAX)) return; __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit, actions_performed); } EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed); static struct net *devlink_netns_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; struct net *net; if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); return ERR_PTR(-EINVAL); } if (netns_pid_attr) { net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); } else if (netns_fd_attr) { net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); } else if (netns_id_attr) { net = get_net_ns_by_id(sock_net(skb->sk), nla_get_u32(netns_id_attr)); if (!net) net = ERR_PTR(-EINVAL); } else { WARN_ON(1); net = ERR_PTR(-EINVAL); } if (IS_ERR(net)) { NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); return ERR_PTR(-EINVAL); } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EPERM); } return net; } static void devlink_reload_netns_change(struct devlink *devlink, struct net *curr_net, struct net *dest_net) { /* Userspace needs to be notified about devlink objects * removed from original and entering new network namespace. * The rest of the devlink objects are re-created during * reload process so the notifications are generated separatelly. */ devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); devlink_rel_nested_in_notify(devlink); } int devlink_reload(struct devlink *devlink, struct net *dest_net, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; struct net *curr_net; int err; memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack); if (err) return err; curr_net = devlink_net(devlink); if (dest_net && !net_eq(dest_net, curr_net)) devlink_reload_netns_change(devlink, curr_net, dest_net); if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) devlink_params_driverinit_load_new(devlink); err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); if (err) return err; WARN_ON(!(*actions_performed & BIT(action))); /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats))); devlink_reload_stats_update(devlink, limit, *actions_performed); return 0; } static int devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed, enum devlink_command cmd, struct genl_info *info) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed, actions_performed)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return -EMSGSIZE; } int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_reload_action action; enum devlink_reload_limit limit; struct net *dest_net = NULL; u32 actions_performed; int err; err = devlink_resources_validate(devlink, NULL, info); if (err) { NL_SET_ERR_MSG(info->extack, "resources size validation failed"); return err; } if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); else action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT; if (!devlink_reload_action_is_supported(devlink, action)) { NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver"); return -EOPNOTSUPP; } limit = DEVLINK_RELOAD_LIMIT_UNSPEC; if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) { struct nla_bitfield32 limits; u32 limits_selected; limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]); limits_selected = limits.value & limits.selector; if (!limits_selected) { NL_SET_ERR_MSG(info->extack, "Invalid limit selected"); return -EINVAL; } for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++) if (limits_selected & BIT(limit)) break; /* UAPI enables multiselection, but currently it is not used */ if (limits_selected != BIT(limit)) { NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported"); return -EOPNOTSUPP; } if (!devlink_reload_limit_is_supported(devlink, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver"); return -EOPNOTSUPP; } if (devlink_reload_combination_is_invalid(action, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action"); return -EINVAL; } } if (info->attrs[DEVLINK_ATTR_NETNS_PID] || info->attrs[DEVLINK_ATTR_NETNS_FD] || info->attrs[DEVLINK_ATTR_NETNS_ID]) { dest_net = devlink_netns_get(skb, info); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); if (!net_eq(dest_net, devlink_net(devlink)) && action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { NL_SET_ERR_MSG_MOD(info->extack, "Changing namespace is only supported for reinit action"); return -EOPNOTSUPP; } } err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) put_net(dest_net); if (err) return err; /* For backward compatibility generate reply only if attributes used by user */ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) return 0; return devlink_nl_reload_actions_performed_snd(devlink, actions_performed, DEVLINK_CMD_RELOAD, info); } bool devlink_reload_actions_valid(const struct devlink_ops *ops) { const struct devlink_reload_combination *comb; int i; if (!devlink_reload_supported(ops)) { if (WARN_ON(ops->reload_actions)) return false; return true; } if (WARN_ON(!ops->reload_actions || ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX))) return false; if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) || ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX))) return false; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) { comb = &devlink_reload_invalid_combinations[i]; if (ops->reload_actions == BIT(comb->action) && ops->reload_limits == BIT(comb->limit)) return false; } return true; } static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; void *hdr; int err = 0; u16 mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = devlink_nl_put_handle(msg, devlink); if (err) goto nla_put_failure; if (ops->eswitch_mode_get) { err = ops->eswitch_mode_get(devlink, &mode); if (err) goto nla_put_failure; err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); if (err) goto nla_put_failure; } if (ops->eswitch_inline_mode_get) { err = ops->eswitch_inline_mode_get(devlink, &inline_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, inline_mode); if (err) goto nla_put_failure; } if (ops->eswitch_encap_mode_get) { err = ops->eswitch_encap_mode_get(devlink, &encap_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode); if (err) goto nla_put_failure; } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; int err = 0; u16 mode; if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) { if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); err = devlink_rate_nodes_check(devlink, mode, info->extack); if (err) return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) { if (!ops->eswitch_inline_mode_set) return -EOPNOTSUPP; inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); err = ops->eswitch_inline_mode_set(devlink, inline_mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) { if (!ops->eswitch_encap_mode_set) return -EOPNOTSUPP; encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); err = ops->eswitch_encap_mode_set(devlink, encap_mode, info->extack); if (err) return err; } return 0; } int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, bsn); } EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { struct nlattr *nest; int err; if (req->version_cb) req->version_cb(version_name, version_type, req->version_cb_priv); if (!req->msg) return 0; nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME, version_name); if (err) goto nla_put_failure; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE, version_value); if (err) goto nla_put_failure; nla_nest_end(req->msg, nest); return 0; nla_put_failure: nla_nest_cancel(req->msg, nest); return err; } int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); int devlink_info_version_stored_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext); int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put); int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); static int devlink_nl_driver_info_get(struct device_driver *drv, struct devlink_info_req *req) { if (!drv) return 0; if (drv->name[0]) return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, drv->name); return 0; } static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; req.msg = msg; if (devlink->ops->info_get) { err = devlink->ops->info_get(devlink, &req, extack); if (err) goto err_cancel_msg; } err = devlink_nl_driver_info_get(dev->driver, &req); if (err) goto err_cancel_msg; genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { int err; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); if (err == -EOPNOTSUPP) err = 0; return err; } int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one); } static int devlink_nl_flash_update_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { void *hdr; hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS) goto out; if (params->status_msg && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG, params->status_msg)) goto nla_put_failure; if (params->component && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, params->component)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, params->done, DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, params->total, DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, params->timeout, DEVLINK_ATTR_PAD)) goto nla_put_failure; out: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void __devlink_flash_update_notify(struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE && cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_flash_update_fill(msg, devlink, cmd, params); if (err) goto out_free_msg; genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); return; out_free_msg: nlmsg_free(msg); } static void devlink_flash_update_begin_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE, ¶ms); } static void devlink_flash_update_end_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_END, ¶ms); } void devlink_flash_update_status_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long done, unsigned long total) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .done = done, .total = total, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, ¶ms); } EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify); void devlink_flash_update_timeout_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long timeout) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .timeout = timeout, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, ¶ms); } EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); struct devlink_flash_component_lookup_ctx { const char *lookup_name; bool lookup_name_found; }; static void devlink_flash_component_lookup_cb(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv) { struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv; if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT || lookup_ctx->lookup_name_found) return; lookup_ctx->lookup_name_found = !strcmp(lookup_ctx->lookup_name, version_name); } static int devlink_flash_component_get(struct devlink *devlink, struct nlattr *nla_component, const char **p_component, struct netlink_ext_ack *extack) { struct devlink_flash_component_lookup_ctx lookup_ctx = {}; struct devlink_info_req req = {}; const char *component; int ret; if (!nla_component) return 0; component = nla_data(nla_component); if (!devlink->ops->info_get) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "component update is not supported by this device"); return -EOPNOTSUPP; } lookup_ctx.lookup_name = component; req.version_cb = devlink_flash_component_lookup_cb; req.version_cb_priv = &lookup_ctx; ret = devlink->ops->info_get(devlink, &req, NULL); if (ret) return ret; if (!lookup_ctx.lookup_name_found) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "selected component is not supported by this device"); return -EINVAL; } *p_component = component; return 0; } int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; const char *file_name; u32 supported_params; int ret; if (!devlink->ops->flash_update) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME)) return -EINVAL; ret = devlink_flash_component_get(devlink, info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT], ¶ms.component, info->extack); if (ret) return ret; supported_params = devlink->ops->supported_flash_update_params; nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK]; if (nla_overwrite_mask) { struct nla_bitfield32 sections; if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) { NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask, "overwrite settings are not supported by this device"); return -EOPNOTSUPP; } sections = nla_get_bitfield32(nla_overwrite_mask); params.overwrite_mask = sections.value & sections.selector; } nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]; file_name = nla_data(nla_file_name); ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file"); return ret; } devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, info->extack); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); return ret; } static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { struct devlink_info_req req = {}; const struct nlattr *nlattr; struct sk_buff *msg; int rem, err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; req.msg = msg; err = devlink->ops->info_get(devlink, &req, NULL); if (err) goto free_msg; nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING) continue; nla_for_each_nested(kv, nlattr, rem_kv) { if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE) continue; strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } } free_msg: nlmsg_free(msg); } void devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { if (!devlink->ops->info_get) return; devl_lock(devlink); if (devl_is_registered(devlink)) __devlink_compat_running_version(devlink, buf, len); devl_unlock(devlink); } int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) { struct devlink_flash_update_params params = {}; int ret; devl_lock(devlink); if (!devl_is_registered(devlink)) { ret = -ENODEV; goto out_unlock; } if (!devlink->ops->flash_update) { ret = -EOPNOTSUPP; goto out_unlock; } ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) goto out_unlock; devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, NULL); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); out_unlock: devl_unlock(devlink); return ret; } static int devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct nlattr *selftests; void *hdr; int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, DEVLINK_CMD_SELFTESTS_GET); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto err_cancel_msg; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { if (devlink->ops->selftest_check(devlink, i, extack)) { err = nla_put_flag(msg, i); if (err) goto err_cancel_msg; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; if (!devlink->ops->selftest_check) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { if (!devlink->ops->selftest_check) return 0; return devlink_nl_selftests_fill(msg, devlink, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); } int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one); } static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, enum devlink_selftest_status test_status) { struct nlattr *result_attr; result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); if (!result_attr) return -EMSGSIZE; if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, test_status)) goto nla_put_failure; nla_nest_end(skb, result_attr); return 0; nla_put_failure: nla_nest_cancel(skb, result_attr); return -EMSGSIZE; } static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; struct devlink *devlink = info->user_ptr[0]; struct nlattr *attrs, *selftests; struct sk_buff *msg; void *hdr; int err; int i; if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS)) return -EINVAL; attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, devlink_selftest_nl_policy, info->extack); if (err < 0) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = -EMSGSIZE; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto genlmsg_cancel; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { enum devlink_selftest_status test_status; if (nla_get_flag(tb[i])) { if (!devlink->ops->selftest_check(devlink, i, info->extack)) { if (devlink_selftest_result_put(msg, i, DEVLINK_SELFTEST_STATUS_SKIP)) goto selftests_nest_cancel; continue; } test_status = devlink->ops->selftest_run(devlink, i, info->extack); if (devlink_selftest_result_put(msg, i, test_status)) goto selftests_nest_cancel; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); selftests_nest_cancel: nla_nest_cancel(msg, selftests); genlmsg_cancel: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return err; } |
2 2 2 1 1 1 1 2 1 3 1 2 2 2 1 2 2 2 2 4 2 2 4 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see https://www.wand.net.nz/ * * This code also uses code from Lulea University, rereleased as GPL by its * authors: * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * * Changes to meet Linux coding standards, to make it meet latest ccid3 draft * and to make it work as a loadable module in the DCCP stack written by * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. * * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include "../dccp.h" #include "ccid3.h" #include <asm/unaligned.h> #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static bool ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) #endif /* * Transmitter Half-Connection Routines */ #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { static const char *const ccid3_state_names[] = { [TFRC_SSTATE_NO_SENT] = "NO_SENT", [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", [TFRC_SSTATE_FBACK] = "FBACK", }; return ccid3_state_names[state]; } #endif static void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hc->tx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); WARN_ON(state == oldstate); hc->tx_state = state; } /* * Compute the initial sending rate X_init in the manner of RFC 3390: * * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT * * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. * For consistency with other parts of the code, X_init is scaled by 2^6. */ static inline u64 rfc3390_initial_rate(struct sock *sk) { const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); const __u32 w_init = clamp_t(__u32, 4380U, 2 * hc->tx_s, 4 * hc->tx_s); return scaled_div(w_init << 6, hc->tx_rtt); } /** * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst * @hc: socket to have the send interval updated * * This respects the granularity of X_inst (64 * bytes/second). */ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) { hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); DCCP_BUG_ON(hc->tx_t_ipi == 0); ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, hc->tx_s, (unsigned int)(hc->tx_x >> 6)); } static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) { u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count); return delta / hc->tx_rtt; } /** * ccid3_hc_tx_update_x - Update allowed sending rate X * @sk: socket to be updated * @stamp: most recent time if available - can be left NULL. * * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support * fine-grained resolution of sending rates. This requires scaling by 2^6 * throughout the code. Only X_calc is unscaled (in bytes/second). * */ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); __u64 min_rate = 2 * hc->tx_x_recv; const __u64 old_x = hc->tx_x; ktime_t now = stamp ? *stamp : ktime_get_real(); /* * Handle IDLE periods: do not reduce below RFC3390 initial sending rate * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis: * a sender is idle if it has not sent anything over a 2-RTT-period. * For consistency with X and X_recv, min_rate is also scaled by 2^6. */ if (ccid3_hc_tx_idle_rtt(hc, now) >= 2) { min_rate = rfc3390_initial_rate(sk); min_rate = max(min_rate, 2 * hc->tx_x_recv); } if (hc->tx_p > 0) { hc->tx_x = min(((__u64)hc->tx_x_calc) << 6, min_rate); hc->tx_x = max(hc->tx_x, (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); } else if (ktime_us_delta(now, hc->tx_t_ld) - (s64)hc->tx_rtt >= 0) { hc->tx_x = min(2 * hc->tx_x, min_rate); hc->tx_x = max(hc->tx_x, scaled_div(((__u64)hc->tx_s) << 6, hc->tx_rtt)); hc->tx_t_ld = now; } if (hc->tx_x != old_x) { ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " "X_recv=%u\n", (unsigned int)(old_x >> 6), (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc, (unsigned int)(hc->tx_x_recv >> 6)); ccid3_update_send_interval(hc); } } /** * ccid3_hc_tx_update_s - Track the mean packet size `s' * @hc: socket to be updated * @len: DCCP packet payload size in bytes * * cf. RFC 4342, 5.3 and RFC 3448, 4.1 */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len) { const u16 old_s = hc->tx_s; hc->tx_s = tfrc_ewma(hc->tx_s, len, 9); if (hc->tx_s != old_s) ccid3_update_send_interval(hc); } /* * Update Window Counter using the algorithm from [RFC 4342, 8.1]. * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc, ktime_t now) { u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count), quarter_rtts = (4 * delta) / hc->tx_rtt; if (quarter_rtts > 0) { hc->tx_t_last_win_count = now; hc->tx_last_win_count += min(quarter_rtts, 5U); hc->tx_last_win_count &= 0xF; /* mod 16 */ } } static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t) { struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer); struct sock *sk = hc->sk; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ /* XXX: set some sensible MIB */ goto restart_timer; } ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, ccid3_tx_state_name(hc->tx_state)); /* Ignore and do not restart after leaving the established state */ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) goto out; /* Reset feedback state to "no feedback received" */ if (hc->tx_state == TFRC_SSTATE_FBACK) ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); /* * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 * RTO is 0 if and only if no feedback has been received yet. */ if (hc->tx_t_rto == 0 || hc->tx_p == 0) { /* halve send rate directly */ hc->tx_x = max(hc->tx_x / 2, (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); ccid3_update_send_interval(hc); } else { /* * Modify the cached value of X_recv * * If (X_calc > 2 * X_recv) * X_recv = max(X_recv / 2, s / (2 * t_mbi)); * Else * X_recv = X_calc / 4; * * Note that X_recv is scaled by 2^6 while X_calc is not */ if (hc->tx_x_calc > (hc->tx_x_recv >> 5)) hc->tx_x_recv = max(hc->tx_x_recv / 2, (((__u64)hc->tx_s) << 6) / (2*TFRC_T_MBI)); else { hc->tx_x_recv = hc->tx_x_calc; hc->tx_x_recv <<= 4; } ccid3_hc_tx_update_x(sk, NULL); } ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", (unsigned long long)hc->tx_x); /* * Set new timeout for the nofeedback timer. * See comments in packet_recv() regarding the value of t_RTO. */ if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */ t_nfb = TFRC_INITIAL_TIMEOUT; else t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); restart_timer: sk_reset_timer(sk, &hc->tx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); out: bh_unlock_sock(sk); sock_put(sk); } /** * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets * @sk: socket to send packet from * @skb: next packet candidate to send on @sk * * This function uses the convention of ccid_packet_dequeue_eval() and * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. */ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); ktime_t now = ktime_get_real(); s64 delay; /* * This function is called only for Data and DataAck packets. Sending * zero-sized Data(Ack)s is theoretically possible, but for congestion * control this case is pathological - ignore it. */ if (unlikely(skb->len == 0)) return -EBADMSG; if (hc->tx_state == TFRC_SSTATE_NO_SENT) { sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); hc->tx_last_win_count = 0; hc->tx_t_last_win_count = now; /* Set t_0 for initial packet */ hc->tx_t_nom = now; hc->tx_s = skb->len; /* * Use initial RTT sample when available: recommended by erratum * to RFC 4342. This implements the initialisation procedure of * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6. */ if (dp->dccps_syn_rtt) { ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hc->tx_rtt = dp->dccps_syn_rtt; hc->tx_x = rfc3390_initial_rate(sk); hc->tx_t_ld = now; } else { /* * Sender does not have RTT sample: * - set fallback RTT (RFC 4340, 3.4) since a RTT value * is needed in several parts (e.g. window counter); * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. */ hc->tx_rtt = DCCP_FALLBACK_RTT; hc->tx_x = hc->tx_s; hc->tx_x <<= 6; } ccid3_update_send_interval(hc); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); } else { delay = ktime_us_delta(hc->tx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions (RFC 5348, 8.3) * * if (t_now > t_nom - delta) * // send the packet now * else * // send the packet in (t_nom - t_now) milliseconds. */ if (delay >= TFRC_T_DELTA) return (u32)delay / USEC_PER_MSEC; ccid3_hc_tx_update_win_count(hc, now); } /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; DCCP_SKB_CB(skb)->dccpd_ccval = hc->tx_last_win_count; /* set the nominal send time for the next following packet */ hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); return CCID_PACKET_SEND_AT_ONCE; } static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); ccid3_hc_tx_update_s(hc, len); if (tfrc_tx_hist_add(&hc->tx_hist, dccp_sk(sk)->dccps_gss)) DCCP_CRIT("packet history - out of memory!"); } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); struct tfrc_tx_hist_entry *acked; ktime_t now; unsigned long t_nfb; u32 r_sample; /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) return; /* * Locate the acknowledged packet in the TX history. * * Returning "entry not found" here can for instance happen when * - the host has not sent out anything (e.g. a passive server), * - the Ack is outdated (packet with higher Ack number was received), * - it is a bogus Ack (for a packet not sent on this connection). */ acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb)); if (acked == NULL) return; /* For the sake of RTT sampling, ignore/remove all older entries */ tfrc_tx_hist_purge(&acked->next); /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ now = ktime_get_real(); r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); /* * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 */ if (hc->tx_state == TFRC_SSTATE_NO_FBACK) { ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); if (hc->tx_t_rto == 0) { /* * Initial feedback packet: Larger Initial Windows (4.2) */ hc->tx_x = rfc3390_initial_rate(sk); hc->tx_t_ld = now; ccid3_update_send_interval(hc); goto done_computing_x; } else if (hc->tx_p == 0) { /* * First feedback after nofeedback timer expiry (4.3) */ goto done_computing_x; } } /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ if (hc->tx_p > 0) hc->tx_x_calc = tfrc_calc_x(hc->tx_s, hc->tx_rtt, hc->tx_p); ccid3_hc_tx_update_x(sk, &now); done_computing_x: ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " "p=%u, X_calc=%u, X_recv=%u, X=%u\n", dccp_role(sk), sk, hc->tx_rtt, r_sample, hc->tx_s, hc->tx_p, hc->tx_x_calc, (unsigned int)(hc->tx_x_recv >> 6), (unsigned int)(hc->tx_x >> 6)); /* unschedule no feedback timer */ sk_stop_timer(sk, &hc->tx_no_feedback_timer); /* * As we have calculated new ipi, delta, t_nom it is possible * that we now can send a packet, so wake up dccp_wait_for_ccid */ sk->sk_write_space(sk); /* * Update timeout interval for the nofeedback timer. In order to control * rate halving on networks with very low RTTs (<= 1 ms), use per-route * tunable RTAX_RTO_MIN value as the lower bound. */ hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, USEC_PER_SEC/HZ * tcp_rto_min(sk)); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); sk_reset_timer(sk, &hc->tx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); } static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, u8 option, u8 *optval, u8 optlen) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); __be32 opt_val; switch (option) { case TFRC_OPT_RECEIVE_RATE: case TFRC_OPT_LOSS_EVENT_RATE: /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ if (packet_type == DCCP_PKT_DATA) break; if (unlikely(optlen != 4)) { DCCP_WARN("%s(%p), invalid len %d for %u\n", dccp_role(sk), sk, optlen, option); return -EINVAL; } opt_val = ntohl(get_unaligned((__be32 *)optval)); if (option == TFRC_OPT_RECEIVE_RATE) { /* Receive Rate is kept in units of 64 bytes/second */ hc->tx_x_recv = opt_val; hc->tx_x_recv <<= 6; ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_val); } else { /* Update the fixpoint Loss Event Rate fraction */ hc->tx_p = tfrc_invert_loss_event_rate(opt_val); ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_val); } } return 0; } static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_tx_sock *hc = ccid_priv(ccid); hc->tx_state = TFRC_SSTATE_NO_SENT; hc->tx_hist = NULL; hc->sk = sk; timer_setup(&hc->tx_no_feedback_timer, ccid3_hc_tx_no_feedback_timer, 0); return 0; } static void ccid3_hc_tx_exit(struct sock *sk) { struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); sk_stop_timer(sk, &hc->tx_no_feedback_timer); tfrc_tx_hist_purge(&hc->tx_hist); } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto; info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt; } static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); struct tfrc_tx_info tfrc; const void *val; switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; tfrc.tfrctx_rtt = hc->tx_rtt; tfrc.tfrctx_p = hc->tx_p; tfrc.tfrctx_rto = hc->tx_t_rto; tfrc.tfrctx_ipi = hc->tx_t_ipi; len = sizeof(tfrc); val = &tfrc; break; default: return -ENOPROTOOPT; } if (put_user(len, optlen) || copy_to_user(optval, val, len)) return -EFAULT; return 0; } /* * Receiver Half-Connection Routines */ /* CCID3 feedback types */ enum ccid3_fback_type { CCID3_FBACK_NONE = 0, CCID3_FBACK_INITIAL, CCID3_FBACK_PERIODIC, CCID3_FBACK_PARAM_CHANGE }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { static const char *const ccid3_rx_state_names[] = { [TFRC_RSTATE_NO_DATA] = "NO_DATA", [TFRC_RSTATE_DATA] = "DATA", }; return ccid3_rx_state_names[state]; } #endif static void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hc->rx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); WARN_ON(state == oldstate); hc->rx_state = state; } static void ccid3_hc_rx_send_feedback(struct sock *sk, const struct sk_buff *skb, enum ccid3_fback_type fbtype) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); ktime_t now = ktime_get(); s64 delta = 0; switch (fbtype) { case CCID3_FBACK_INITIAL: hc->rx_x_recv = 0; hc->rx_pinv = ~0U; /* see RFC 4342, 8.5 */ break; case CCID3_FBACK_PARAM_CHANGE: /* * When parameters change (new loss or p > p_prev), we do not * have a reliable estimate for R_m of [RFC 3448, 6.2] and so * need to reuse the previous value of X_recv. However, when * X_recv was 0 (due to early loss), this would kill X down to * s/t_mbi (i.e. one packet in 64 seconds). * To avoid such drastic reduction, we approximate X_recv as * the number of bytes since last feedback. * This is a safe fallback, since X is bounded above by X_calc. */ if (hc->rx_x_recv > 0) break; fallthrough; case CCID3_FBACK_PERIODIC: delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); if (delta <= 0) delta = 1; hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); break; default: return; } ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta, hc->rx_x_recv, hc->rx_pinv); hc->rx_tstamp_last_feedback = now; hc->rx_last_counter = dccp_hdr(skb)->dccph_ccval; hc->rx_bytes_recv = 0; dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); __be32 x_recv, pinv; if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; if (dccp_packet_without_ack(skb)) return 0; x_recv = htonl(hc->rx_x_recv); pinv = htonl(hc->rx_pinv); if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)) || dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv))) return -1; return 0; } /** * ccid3_first_li - Implements [RFC 5348, 6.3.1] * @sk: socket to calculate loss interval for * * Determine the length of the first loss interval via inverse lookup. * Assume that X_recv can be computed by the throughput equation * s * X_recv = -------- * R * fval * Find some p such that f(p) = fval; return 1/p (scaled). */ static u32 ccid3_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); u32 x_recv, p; s64 delta; u64 fval; if (hc->rx_rtt == 0) { DCCP_WARN("No RTT estimate available, using fallback RTT\n"); hc->rx_rtt = DCCP_FALLBACK_RTT; } delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback); if (delta <= 0) delta = 1; x_recv = scaled_div32(hc->rx_bytes_recv, delta); if (x_recv == 0) { /* would also trigger divide-by-zero */ DCCP_WARN("X_recv==0\n"); if (hc->rx_x_recv == 0) { DCCP_BUG("stored value of X_recv is zero"); return ~0U; } x_recv = hc->rx_x_recv; } fval = scaled_div(hc->rx_s, hc->rx_rtt); fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); return p == 0 ? ~0U : scaled_div(1, p); } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; const bool is_data_packet = dccp_data_packet(skb); if (unlikely(hc->rx_state == TFRC_RSTATE_NO_DATA)) { if (is_data_packet) { const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; do_feedback = CCID3_FBACK_INITIAL; ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); hc->rx_s = payload; /* * Not necessary to update rx_bytes_recv here, * since X_recv = 0 for the first feedback packet (cf. * RFC 3448, 6.3) -- gerrit */ } goto update_records; } if (tfrc_rx_hist_duplicate(&hc->rx_hist, skb)) return; /* done receiving */ if (is_data_packet) { const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; /* * Update moving-average of s and the sum of received payload bytes */ hc->rx_s = tfrc_ewma(hc->rx_s, payload, 9); hc->rx_bytes_recv += payload; } /* * Perform loss detection and handle pending losses */ if (tfrc_rx_handle_loss(&hc->rx_hist, &hc->rx_li_hist, skb, ndp, ccid3_first_li, sk)) { do_feedback = CCID3_FBACK_PARAM_CHANGE; goto done_receiving; } if (tfrc_rx_hist_loss_pending(&hc->rx_hist)) return; /* done receiving */ /* * Handle data packets: RTT sampling and monitoring p */ if (unlikely(!is_data_packet)) goto update_records; if (!tfrc_lh_is_initialised(&hc->rx_li_hist)) { const u32 sample = tfrc_rx_hist_sample_rtt(&hc->rx_hist, skb); /* * Empty loss history: no loss so far, hence p stays 0. * Sample RTT values, since an RTT estimate is required for the * computation of p when the first loss occurs; RFC 3448, 6.3.1. */ if (sample != 0) hc->rx_rtt = tfrc_ewma(hc->rx_rtt, sample, 9); } else if (tfrc_lh_update_i_mean(&hc->rx_li_hist, skb)) { /* * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean * has decreased (resp. p has increased), send feedback now. */ do_feedback = CCID3_FBACK_PARAM_CHANGE; } /* * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 */ if (SUB16(dccp_hdr(skb)->dccph_ccval, hc->rx_last_counter) > 3) do_feedback = CCID3_FBACK_PERIODIC; update_records: tfrc_rx_hist_add_packet(&hc->rx_hist, skb, ndp); done_receiving: if (do_feedback) ccid3_hc_rx_send_feedback(sk, skb, do_feedback); } static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_rx_sock *hc = ccid_priv(ccid); hc->rx_state = TFRC_RSTATE_NO_DATA; tfrc_lh_init(&hc->rx_li_hist); return tfrc_rx_hist_alloc(&hc->rx_hist); } static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); tfrc_rx_hist_purge(&hc->rx_hist); tfrc_lh_cleanup(&hc->rx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt; } static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); struct tfrc_rx_info rx_info; const void *val; switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(rx_info)) return -EINVAL; rx_info.tfrcrx_x_recv = hc->rx_x_recv; rx_info.tfrcrx_rtt = hc->rx_rtt; rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv); len = sizeof(rx_info); val = &rx_info; break; default: return -ENOPROTOOPT; } if (put_user(len, optlen) || copy_to_user(optval, val, len)) return -EFAULT; return 0; } struct ccid_operations ccid3_ops = { .ccid_id = DCCPC_CCID3, .ccid_name = "TCP-Friendly Rate Control", .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), .ccid_hc_tx_init = ccid3_hc_tx_init, .ccid_hc_tx_exit = ccid3_hc_tx_exit, .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), .ccid_hc_rx_init = ccid3_hc_rx_init, .ccid_hc_rx_exit = ccid3_hc_rx_exit, .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, bool, 0644); MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages"); #endif |
10 77 7 78 58 78 46 47 46 46 9 2 15 7 9 2 9 112 109 96 19 78 46 1 19 46 6 1 3 2 1 108 1 15 15 1 14 1 1 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008-2014 Mathieu Desnoyers */ #include <linux/module.h> #include <linux/mutex.h> #include <linux/types.h> #include <linux/jhash.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/tracepoint.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/static_key.h> enum tp_func_state { TP_FUNC_0, TP_FUNC_1, TP_FUNC_2, TP_FUNC_N, }; extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); enum tp_transition_sync { TP_TRANSITION_SYNC_1_0_1, TP_TRANSITION_SYNC_N_2_1, _NR_TP_TRANSITION_SYNC, }; struct tp_transition_snapshot { unsigned long rcu; unsigned long srcu; bool ongoing; }; /* Protected by tracepoints_mutex */ static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; static void tp_rcu_get_state(enum tp_transition_sync sync) { struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; /* Keep the latest get_state snapshot. */ snapshot->rcu = get_state_synchronize_rcu(); snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = true; } static void tp_rcu_cond_sync(enum tp_transition_sync sync) { struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; if (!snapshot->ongoing) return; cond_synchronize_rcu(snapshot->rcu); if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = false; } /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; #ifdef CONFIG_MODULES /* * Tracepoint module list mutex protects the local module list. */ static DEFINE_MUTEX(tracepoint_module_list_mutex); /* Local list of struct tp_module */ static LIST_HEAD(tracepoint_module_list); #endif /* CONFIG_MODULES */ /* * tracepoints_mutex protects the builtin and module tracepoints. * tracepoints_mutex nests inside tracepoint_module_list_mutex. */ static DEFINE_MUTEX(tracepoints_mutex); static struct rcu_head *early_probes; static bool ok_to_free_tracepoints; /* * Note about RCU : * It is used to delay the free of multiple probes array until a quiescent * state is reached. */ struct tp_probes { struct rcu_head rcu; struct tracepoint_func probes[]; }; /* Called in removal of a func but failed to allocate a new tp_funcs */ static void tp_stub_func(void) { return; } static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), GFP_KERNEL); return p == NULL ? NULL : p->probes; } static void srcu_free_old_probes(struct rcu_head *head) { kfree(container_of(head, struct tp_probes, rcu)); } static void rcu_free_old_probes(struct rcu_head *head) { call_srcu(&tracepoint_srcu, head, srcu_free_old_probes); } static __init int release_early_probes(void) { struct rcu_head *tmp; ok_to_free_tracepoints = true; while (early_probes) { tmp = early_probes; early_probes = tmp->next; call_rcu(tmp, rcu_free_old_probes); } return 0; } /* SRCU is initialized at core_initcall */ postcore_initcall(release_early_probes); static inline void release_probes(struct tracepoint_func *old) { if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); /* * We can't free probes if SRCU is not initialized yet. * Postpone the freeing till after SRCU is initialized. */ if (unlikely(!ok_to_free_tracepoints)) { tp_probes->rcu.next = early_probes; early_probes = &tp_probes->rcu; return; } /* * Tracepoint probes are protected by both sched RCU and SRCU, * by calling the SRCU callback in the sched RCU callback we * cover both cases. So let us chain the SRCU and sched RCU * callbacks to wait for both grace periods. */ call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } static void debug_print_probes(struct tracepoint_func *funcs) { int i; if (!tracepoint_debug || !funcs) return; for (i = 0; funcs[i].func; i++) printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func); } static struct tracepoint_func * func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; int iter_probes; /* Iterate over old probe array. */ int nr_probes = 0; /* Counter for probes */ int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); debug_print_probes(*funcs); old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ for (iter_probes = 0; old[iter_probes].func; iter_probes++) { if (old[iter_probes].func == tp_stub_func) continue; /* Skip stub functions. */ if (old[iter_probes].func == tp_func->func && old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ new = allocate_probes(nr_probes + 2); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { nr_probes = 0; for (iter_probes = 0; old[iter_probes].func; iter_probes++) { if (old[iter_probes].func == tp_stub_func) continue; /* Insert before probes of lower priority */ if (pos < 0 && old[iter_probes].prio < prio) pos = nr_probes++; new[nr_probes++] = old[iter_probes]; } if (pos < 0) pos = nr_probes++; /* nr_probes now points to the end of the new array */ } else { pos = 0; nr_probes = 1; /* must point at end of array */ } new[pos] = *tp_func; new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; } static void *func_remove(struct tracepoint_func **funcs, struct tracepoint_func *tp_func) { int nr_probes = 0, nr_del = 0, i; struct tracepoint_func *old, *new; old = *funcs; if (!old) return ERR_PTR(-ENOENT); debug_print_probes(*funcs); /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { if ((old[nr_probes].func == tp_func->func && old[nr_probes].data == tp_func->data) || old[nr_probes].func == tp_stub_func) nr_del++; } } /* * If probe is NULL, then nr_probes = nr_del = 0, and then the * entire entry will be removed. */ if (nr_probes - nr_del == 0) { /* N -> 0, (N > 1) */ *funcs = NULL; debug_print_probes(*funcs); return old; } else { int j = 0; /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); if (new) { for (i = 0; old[i].func; i++) { if ((old[i].func != tp_func->func || old[i].data != tp_func->data) && old[i].func != tp_stub_func) new[j++] = old[i]; } new[nr_probes - nr_del].func = NULL; *funcs = new; } else { /* * Failed to allocate, replace the old function * with calls to tp_stub_func. */ for (i = 0; old[i].func; i++) { if (old[i].func == tp_func->func && old[i].data == tp_func->data) WRITE_ONCE(old[i].func, tp_stub_func); } *funcs = old; } } debug_print_probes(*funcs); return old; } /* * Count the number of functions (enum tp_func_state) in a tp_funcs array. */ static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) { if (!tp_funcs) return TP_FUNC_0; if (!tp_funcs[1].func) return TP_FUNC_1; if (!tp_funcs[2].func) return TP_FUNC_2; return TP_FUNC_N; /* 3 or more */ } static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } /* * Add the probe function to a tracepoint. */ static int tracepoint_add_func(struct tracepoint *tp, struct tracepoint_func *func, int prio, bool warn) { struct tracepoint_func *old, *tp_funcs; int ret; if (tp->regfunc && !static_key_enabled(&tp->key)) { ret = tp->regfunc(); if (ret < 0) return ret; } tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func, prio); if (IS_ERR(old)) { WARN_ON_ONCE(warn && PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } /* * rcu_assign_pointer has as smp_store_release() which makes sure * that the new probe callbacks array is consistent before setting * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ switch (nr_func_state(tp_funcs)) { case TP_FUNC_1: /* 0->1 */ /* * Make sure new static func never uses old data after a * 1->0->1 transition sequence. */ tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, tp_funcs); static_key_enable(&tp->key); break; case TP_FUNC_2: /* 1->2 */ /* Set iterator static call */ tracepoint_update_call(tp, tp_funcs); /* * Iterator callback installed before updating tp->funcs. * Requires ordering between RCU assign/dereference and * static call update/call. */ fallthrough; case TP_FUNC_N: /* N->N+1 (N>1) */ rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>1) transition sequence. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); break; } release_probes(old); return 0; } /* * Remove a probe function from a tracepoint. * Note: only waiting an RCU period after setting elem->call to the empty * function insures that the original callback is not used anymore. This insured * by preempt_disable around the call site. */ static int tracepoint_remove_func(struct tracepoint *tp, struct tracepoint_func *func) { struct tracepoint_func *old, *tp_funcs; tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); if (tp_funcs == old) /* Failed allocating new tp_funcs, replaced func with stub */ return 0; switch (nr_func_state(tp_funcs)) { case TP_FUNC_0: /* 1->0 */ /* Removed last function */ if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); static_key_disable(&tp->key); /* Set iterator static call */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, NULL); /* * Make sure new static func never uses old data after a * 1->0->1 transition sequence. */ tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); break; case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>2) transition sequence. If the first * element's data has changed, then force the synchronization * to prevent current readers that have loaded the old data * from calling the new function. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); break; case TP_FUNC_2: /* N->N-1 (N>2) */ fallthrough; case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>2) transition sequence. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); break; } release_probes(old); return 0; } /** * tracepoint_probe_register_prio_may_exist - Connect a probe to a tracepoint with priority * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * @prio: priority of this function over other registered functions * * Same as tracepoint_probe_register_prio() except that it will not warn * if the tracepoint is already registered. */ int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data, int prio) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; tp_func.prio = prio; ret = tracepoint_add_func(tp, &tp_func, prio, false); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist); /** * tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * @prio: priority of this function over other registered functions * * Returns 0 if ok, error value on error. * Note: if @tp is within a module, the caller is responsible for * unregistering the probe before the module is gone. This can be * performed either with a tracepoint module going notifier, or from * within module exit functions. */ int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, int prio) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; tp_func.prio = prio; ret = tracepoint_add_func(tp, &tp_func, prio, true); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio); /** * tracepoint_probe_register - Connect a probe to a tracepoint * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * * Returns 0 if ok, error value on error. * Note: if @tp is within a module, the caller is responsible for * unregistering the probe before the module is gone. This can be * performed either with a tracepoint module going notifier, or from * within module exit functions. */ int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } EXPORT_SYMBOL_GPL(tracepoint_probe_register); /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @tp: tracepoint * @probe: probe function pointer * @data: tracepoint data * * Returns 0 if ok, error value on error. */ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; ret = tracepoint_remove_func(tp, &tp_func); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); static void for_each_tracepoint_range( tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { tracepoint_ptr_t *iter; if (!begin) return; for (iter = begin; iter < end; iter++) fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) | (1 << TAINT_UNSIGNED_MODULE) | (1 << TAINT_TEST) | (1 << TAINT_LIVEPATCH)); } static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list); /** * register_tracepoint_module_notifier - register tracepoint coming/going notifier * @nb: notifier block * * Notifiers registered with this function are called on module * coming/going with the tracepoint_module_list_mutex held. * The notifier block callback should expect a "struct tp_module" data * pointer. */ int register_tracepoint_module_notifier(struct notifier_block *nb) { struct tp_module *tp_mod; int ret; mutex_lock(&tracepoint_module_list_mutex); ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb); if (ret) goto end; list_for_each_entry(tp_mod, &tracepoint_module_list, list) (void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod); end: mutex_unlock(&tracepoint_module_list_mutex); return ret; } EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier); /** * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier * @nb: notifier block * * The notifier block callback should expect a "struct tp_module" data * pointer. */ int unregister_tracepoint_module_notifier(struct notifier_block *nb) { struct tp_module *tp_mod; int ret; mutex_lock(&tracepoint_module_list_mutex); ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb); if (ret) goto end; list_for_each_entry(tp_mod, &tracepoint_module_list, list) (void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod); end: mutex_unlock(&tracepoint_module_list_mutex); return ret; } EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); /* * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod; if (!mod->num_tracepoints) return 0; /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. * Staging, out-of-tree, unsigned GPL, and test modules are fine. */ if (trace_module_has_bad_taint(mod)) return 0; tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); if (!tp_mod) return -ENOMEM; tp_mod->mod = mod; mutex_lock(&tracepoint_module_list_mutex); list_add_tail(&tp_mod->list, &tracepoint_module_list); blocking_notifier_call_chain(&tracepoint_notify_list, MODULE_STATE_COMING, tp_mod); mutex_unlock(&tracepoint_module_list_mutex); return 0; } static void tracepoint_module_going(struct module *mod) { struct tp_module *tp_mod; if (!mod->num_tracepoints) return; mutex_lock(&tracepoint_module_list_mutex); list_for_each_entry(tp_mod, &tracepoint_module_list, list) { if (tp_mod->mod == mod) { blocking_notifier_call_chain(&tracepoint_notify_list, MODULE_STATE_GOING, tp_mod); list_del(&tp_mod->list); kfree(tp_mod); /* * Called the going notifier before checking for * quiescence. */ for_each_tracepoint_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints, tp_module_going_check_quiescent, NULL); break; } } /* * In the case of modules that were tainted at "coming", we'll simply * walk through the list without finding it. We cannot use the "tainted" * flag on "going", in case a module taints the kernel only after being * loaded. */ mutex_unlock(&tracepoint_module_list_mutex); } static int tracepoint_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; int ret = 0; switch (val) { case MODULE_STATE_COMING: ret = tracepoint_module_coming(mod); break; case MODULE_STATE_LIVE: break; case MODULE_STATE_GOING: tracepoint_module_going(mod); break; case MODULE_STATE_UNFORMED: break; } return notifier_from_errno(ret); } static struct notifier_block tracepoint_module_nb = { .notifier_call = tracepoint_module_notify, .priority = 0, }; static __init int init_tracepoints(void) { int ret; ret = register_module_notifier(&tracepoint_module_nb); if (ret) pr_warn("Failed to register tracepoint module enter notifier\n"); return ret; } __initcall(init_tracepoints); #endif /* CONFIG_MODULES */ /** * for_each_kernel_tracepoint - iteration on all kernel tracepoints * @fct: callback * @priv: private data */ void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), void *priv) { for_each_tracepoint_range(__start___tracepoints_ptrs, __stop___tracepoints_ptrs, fct, priv); } EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ static int sys_tracepoint_refcount; int syscall_regfunc(void) { struct task_struct *p, *t; if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { set_task_syscall_work(t, SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; return 0; } void syscall_unregfunc(void) { struct task_struct *p, *t; sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { clear_task_syscall_work(t, SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } } #endif |
1 1 2 2 || // SPDX-License-Identifier: GPL-2.0-or-later /* * authencesn.c - AEAD wrapper for IPsec with extended sequence numbers, * derived from authenc.c * * Copyright (C) 2010 secunet Security Networks AG * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/aead.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/authenc.h> #include <crypto/null.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> struct authenc_esn_instance_ctx { struct crypto_ahash_spawn auth; struct crypto_skcipher_spawn enc; }; struct crypto_authenc_esn_ctx { unsigned int reqoff; struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; }; struct authenc_esn_request_ctx { struct scatterlist src[2]; struct scatterlist dst[2]; char tail[]; }; static void authenc_esn_request_complete(struct aead_request *req, int err) { if (err != -EINPROGRESS) aead_request_complete(req, err); } static int crypto_authenc_esn_setauthsize(struct crypto_aead *authenc_esn, unsigned int authsize) { if (authsize > 0 && authsize < 4) return -EINVAL; return 0; } static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *key, unsigned int keylen) { struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct crypto_ahash *auth = ctx->auth; struct crypto_skcipher *enc = ctx->enc; struct crypto_authenc_keys keys; int err = -EINVAL; if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) goto out; crypto_ahash_clear_flags(auth, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(auth, crypto_aead_get_flags(authenc_esn) & CRYPTO_TFM_REQ_MASK); err = crypto_ahash_setkey(auth, keys.authkey, keys.authkeylen); if (err) goto out; crypto_skcipher_clear_flags(enc, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(enc, crypto_aead_get_flags(authenc_esn) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(enc, keys.enckey, keys.enckeylen); out: memzero_explicit(&keys, sizeof(keys)); return err; } static int crypto_authenc_esn_genicv_tail(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); u8 *hash = areq_ctx->tail; unsigned int authsize = crypto_aead_authsize(authenc_esn); unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; struct scatterlist *dst = req->dst; u32 tmp[2]; /* Move high-order bits of sequence number back. */ scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); scatterwalk_map_and_copy(tmp, dst, 0, 8, 1); scatterwalk_map_and_copy(hash, dst, assoclen + cryptlen, authsize, 1); return 0; } static void authenc_esn_geniv_ahash_done(void *data, int err) { struct aead_request *req = data; err = err ?: crypto_authenc_esn_genicv_tail(req, 0); aead_request_complete(req, err); } static int crypto_authenc_esn_genicv(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct crypto_ahash *auth = ctx->auth; u8 *hash = areq_ctx->tail; struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff); unsigned int authsize = crypto_aead_authsize(authenc_esn); unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; struct scatterlist *dst = req->dst; u32 tmp[2]; if (!authsize) return 0; /* Move high-order bits of sequence number to the end. */ scatterwalk_map_and_copy(tmp, dst, 0, 8, 0); scatterwalk_map_and_copy(tmp, dst, 4, 4, 1); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1); sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4); ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, dst, hash, assoclen + cryptlen); ahash_request_set_callback(ahreq, flags, authenc_esn_geniv_ahash_done, req); return crypto_ahash_digest(ahreq) ?: crypto_authenc_esn_genicv_tail(req, aead_request_flags(req)); } static void crypto_authenc_esn_encrypt_done(void *data, int err) { struct aead_request *areq = data; if (!err) err = crypto_authenc_esn_genicv(areq, 0); authenc_esn_request_complete(areq, err); } static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); skcipher_request_set_sync_tfm(skreq, ctx->null); skcipher_request_set_callback(skreq, aead_request_flags(req), NULL, NULL); skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL); return crypto_skcipher_encrypt(skreq); } static int crypto_authenc_esn_encrypt(struct aead_request *req) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct skcipher_request *skreq = (void *)(areq_ctx->tail + ctx->reqoff); struct crypto_skcipher *enc = ctx->enc; unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; struct scatterlist *src, *dst; int err; sg_init_table(areq_ctx->src, 2); src = scatterwalk_ffwd(areq_ctx->src, req->src, assoclen); dst = src; if (req->src != req->dst) { err = crypto_authenc_esn_copy(req, assoclen); if (err) return err; sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, assoclen); } skcipher_request_set_tfm(skreq, enc); skcipher_request_set_callback(skreq, aead_request_flags(req), crypto_authenc_esn_encrypt_done, req); skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); err = crypto_skcipher_encrypt(skreq); if (err) return err; return crypto_authenc_esn_genicv(req, aead_request_flags(req)); } static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, unsigned int flags) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(authenc_esn); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct skcipher_request *skreq = (void *)(areq_ctx->tail + ctx->reqoff); struct crypto_ahash *auth = ctx->auth; u8 *ohash = areq_ctx->tail; unsigned int cryptlen = req->cryptlen - authsize; unsigned int assoclen = req->assoclen; struct scatterlist *dst = req->dst; u8 *ihash = ohash + crypto_ahash_digestsize(auth); u32 tmp[2]; if (!authsize) goto decrypt; /* Move high-order bits of sequence number back. */ scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); scatterwalk_map_and_copy(tmp, dst, 0, 8, 1); if (crypto_memneq(ihash, ohash, authsize)) return -EBADMSG; decrypt: sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen); skcipher_request_set_tfm(skreq, ctx->enc); skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, dst, dst, cryptlen, req->iv); return crypto_skcipher_decrypt(skreq); } static void authenc_esn_verify_ahash_done(void *data, int err) { struct aead_request *req = data; err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); authenc_esn_request_complete(req, err); } static int crypto_authenc_esn_decrypt(struct aead_request *req) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct authenc_esn_request_ctx *areq_ctx = aead_request_ctx(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff); unsigned int authsize = crypto_aead_authsize(authenc_esn); struct crypto_ahash *auth = ctx->auth; u8 *ohash = areq_ctx->tail; unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; u8 *ihash = ohash + crypto_ahash_digestsize(auth); struct scatterlist *dst = req->dst; u32 tmp[2]; int err; cryptlen -= authsize; if (req->src != dst) { err = crypto_authenc_esn_copy(req, assoclen + cryptlen); if (err) return err; } scatterwalk_map_and_copy(ihash, req->src, assoclen + cryptlen, authsize, 0); if (!authsize) goto tail; /* Move high-order bits of sequence number to the end. */ scatterwalk_map_and_copy(tmp, dst, 0, 8, 0); scatterwalk_map_and_copy(tmp, dst, 4, 4, 1); scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1); sg_init_table(areq_ctx->dst, 2); dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4); ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, dst, ohash, assoclen + cryptlen); ahash_request_set_callback(ahreq, aead_request_flags(req), authenc_esn_verify_ahash_done, req); err = crypto_ahash_digest(ahreq); if (err) return err; tail: return crypto_authenc_esn_decrypt_tail(req, aead_request_flags(req)); } static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct authenc_esn_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; struct crypto_skcipher *enc; struct crypto_sync_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; ctx->auth = auth; ctx->enc = enc; ctx->null = null; ctx->reqoff = 2 * crypto_ahash_digestsize(auth); crypto_aead_set_reqsize( tfm, sizeof(struct authenc_esn_request_ctx) + ctx->reqoff + max_t(unsigned int, crypto_ahash_reqsize(auth) + sizeof(struct ahash_request), sizeof(struct skcipher_request) + crypto_skcipher_reqsize(enc))); return 0; err_free_skcipher: crypto_free_skcipher(enc); err_free_ahash: crypto_free_ahash(auth); return err; } static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm) { struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->auth); crypto_free_skcipher(ctx->enc); crypto_put_default_null_skcipher(); } static void crypto_authenc_esn_free(struct aead_instance *inst) { struct authenc_esn_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_skcipher(&ctx->enc); crypto_drop_ahash(&ctx->auth); kfree(inst); } static int crypto_authenc_esn_create(struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct aead_instance *inst; struct authenc_esn_instance_ctx *ctx; struct skcipher_alg_common *enc; struct hash_alg_common *auth; struct crypto_alg *auth_base; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); err = crypto_grab_ahash(&ctx->auth, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; auth = crypto_spawn_ahash_alg(&ctx->auth); auth_base = &auth->base; err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) goto err_free_inst; enc = crypto_spawn_skcipher_alg_common(&ctx->enc); err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "authencesn(%s,%s)", auth_base->cra_name, enc->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "authencesn(%s,%s)", auth_base->cra_driver_name, enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = enc->base.cra_priority * 10 + auth_base->cra_priority; inst->alg.base.cra_blocksize = enc->base.cra_blocksize; inst->alg.base.cra_alignmask = enc->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct crypto_authenc_esn_ctx); inst->alg.ivsize = enc->ivsize; inst->alg.chunksize = enc->chunksize; inst->alg.maxauthsize = auth->digestsize; inst->alg.init = crypto_authenc_esn_init_tfm; inst->alg.exit = crypto_authenc_esn_exit_tfm; inst->alg.setkey = crypto_authenc_esn_setkey; inst->alg.setauthsize = crypto_authenc_esn_setauthsize; inst->alg.encrypt = crypto_authenc_esn_encrypt; inst->alg.decrypt = crypto_authenc_esn_decrypt; inst->free = crypto_authenc_esn_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_authenc_esn_free(inst); } return err; } static struct crypto_template crypto_authenc_esn_tmpl = { .name = "authencesn", .create = crypto_authenc_esn_create, .module = THIS_MODULE, }; static int __init crypto_authenc_esn_module_init(void) { return crypto_register_template(&crypto_authenc_esn_tmpl); } static void __exit crypto_authenc_esn_module_exit(void) { crypto_unregister_template(&crypto_authenc_esn_tmpl); } subsys_initcall(crypto_authenc_esn_module_init); module_exit(crypto_authenc_esn_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_DESCRIPTION("AEAD wrapper for IPsec with extended sequence numbers"); MODULE_ALIAS_CRYPTO("authencesn"); |
1 1 1 3 1 4 2 2 4 4 2 2 1 1 1 1 1 1 3 1 2 3 2 1 1 1 2 7 3 2 1 2 2 4 4 2 1 2 1 1 2 1 1 3 2 1 || // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> #include <linux/igmp.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/if_ether.h> #include <net/ip.h> #include <net/netlink.h> #include <net/switchdev.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/addrconf.h> #endif #include "br_private.h" static bool br_ip4_rports_get_timer(struct net_bridge_mcast_port *pmctx, unsigned long *timer) { *timer = br_timer_value(&pmctx->ip4_mc_router_timer); return !hlist_unhashed(&pmctx->ip4_rlist); } static bool br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx, unsigned long *timer) { #if IS_ENABLED(CONFIG_IPV6) *timer = br_timer_value(&pmctx->ip6_mc_router_timer); return !hlist_unhashed(&pmctx->ip6_rlist); #else *timer = 0; return false; #endif } static size_t __br_rports_one_size(void) { return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */ nla_total_size(sizeof(u8)) + /* MDBA_ROUTER_PATTR_TYPE */ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */ nla_total_size(sizeof(u32)); /* MDBA_ROUTER_PATTR_VID */ } size_t br_rports_size(const struct net_bridge_mcast *brmctx) { struct net_bridge_mcast_port *pmctx; size_t size = nla_total_size(0); /* MDBA_ROUTER */ rcu_read_lock(); hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, ip4_rlist) size += __br_rports_one_size(); #if IS_ENABLED(CONFIG_IPV6) hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, ip6_rlist) size += __br_rports_one_size(); #endif rcu_read_unlock(); return size; } int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx) { u16 vid = brmctx->vlan ? brmctx->vlan->vid : 0; bool have_ip4_mc_rtr, have_ip6_mc_rtr; unsigned long ip4_timer, ip6_timer; struct nlattr *nest, *port_nest; struct net_bridge_port *p; if (!brmctx->multicast_router || !br_rports_have_mc_router(brmctx)) return 0; nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (nest == NULL) return -EMSGSIZE; list_for_each_entry_rcu(p, &brmctx->br->port_list, list) { struct net_bridge_mcast_port *pmctx; if (vid) { struct net_bridge_vlan *v; v = br_vlan_find(nbp_vlan_group(p), vid); if (!v) continue; pmctx = &v->port_mcast_ctx; } else { pmctx = &p->multicast_ctx; } have_ip4_mc_rtr = br_ip4_rports_get_timer(pmctx, &ip4_timer); have_ip6_mc_rtr = br_ip6_rports_get_timer(pmctx, &ip6_timer); if (!have_ip4_mc_rtr && !have_ip6_mc_rtr) continue; port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); if (!port_nest) goto fail; if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) || nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER, max(ip4_timer, ip6_timer)) || nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE, p->multicast_ctx.multicast_router) || (have_ip4_mc_rtr && nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER, ip4_timer)) || (have_ip6_mc_rtr && nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER, ip6_timer)) || (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid))) { nla_nest_cancel(skb, port_nest); goto fail; } nla_nest_end(skb, port_nest); } nla_nest_end(skb, nest); return 0; fail: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) { e->state = flags & MDB_PG_FLAGS_PERMANENT; e->flags = 0; if (flags & MDB_PG_FLAGS_OFFLOAD) e->flags |= MDB_FLAGS_OFFLOAD; if (flags & MDB_PG_FLAGS_FAST_LEAVE) e->flags |= MDB_FLAGS_FAST_LEAVE; if (flags & MDB_PG_FLAGS_STAR_EXCL) e->flags |= MDB_FLAGS_STAR_EXCL; if (flags & MDB_PG_FLAGS_BLOCKED) e->flags |= MDB_FLAGS_BLOCKED; } static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip, struct nlattr **mdb_attrs) { memset(ip, 0, sizeof(struct br_ip)); ip->vid = entry->vid; ip->proto = entry->addr.proto; switch (ip->proto) { case htons(ETH_P_IP): ip->dst.ip4 = entry->addr.u.ip4; if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE]) ip->src.ip4 = nla_get_in_addr(mdb_attrs[MDBE_ATTR_SOURCE]); break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ip->dst.ip6 = entry->addr.u.ip6; if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE]) ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]); break; #endif default: ether_addr_copy(ip->dst.mac_addr, entry->addr.u.mac_addr); } } static int __mdb_fill_srcs(struct sk_buff *skb, struct net_bridge_port_group *p) { struct net_bridge_group_src *ent; struct nlattr *nest, *nest_ent; if (hlist_empty(&p->src_list)) return 0; nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); if (!nest) return -EMSGSIZE; hlist_for_each_entry_rcu(ent, &p->src_list, node, lockdep_is_held(&p->key.port->br->multicast_lock)) { nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); if (!nest_ent) goto out_cancel_err; switch (ent->addr.proto) { case htons(ETH_P_IP): if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, ent->addr.src.ip4)) { nla_nest_cancel(skb, nest_ent); goto out_cancel_err; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, &ent->addr.src.ip6)) { nla_nest_cancel(skb, nest_ent); goto out_cancel_err; } break; #endif default: nla_nest_cancel(skb, nest_ent); continue; } if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, br_timer_value(&ent->timer))) { nla_nest_cancel(skb, nest_ent); goto out_cancel_err; } nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest); return 0; out_cancel_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int __mdb_fill_info(struct sk_buff *skb, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *p) { bool dump_srcs_mode = false; struct timer_list *mtimer; struct nlattr *nest_ent; struct br_mdb_entry e; u8 flags = 0; int ifindex; memset(&e, 0, sizeof(e)); if (p) { ifindex = p->key.port->dev->ifindex; mtimer = &p->timer; flags = p->flags; } else { ifindex = mp->br->dev->ifindex; mtimer = &mp->timer; } __mdb_entry_fill_flags(&e, flags); e.ifindex = ifindex; e.vid = mp->addr.vid; if (mp->addr.proto == htons(ETH_P_IP)) { e.addr.u.ip4 = mp->addr.dst.ip4; #if IS_ENABLED(CONFIG_IPV6) } else if (mp->addr.proto == htons(ETH_P_IPV6)) { e.addr.u.ip6 = mp->addr.dst.ip6; #endif } else { ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); e.state = MDB_PERMANENT; } e.addr.proto = mp->addr.proto; nest_ent = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); if (!nest_ent) return -EMSGSIZE; if (nla_put_nohdr(skb, sizeof(e), &e) || nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, br_timer_value(mtimer))) goto nest_err; switch (mp->addr.proto) { case htons(ETH_P_IP): dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3); if (mp->addr.src.ip4) { if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE, mp->addr.src.ip4)) goto nest_err; break; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2); if (!ipv6_addr_any(&mp->addr.src.ip6)) { if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE, &mp->addr.src.ip6)) goto nest_err; break; } break; #endif default: ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); } if (p) { if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol)) goto nest_err; if (dump_srcs_mode && (__mdb_fill_srcs(skb, p) || nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) goto nest_err; } nla_nest_end(skb, nest_ent); return 0; nest_err: nla_nest_cancel(skb, nest_ent); return -EMSGSIZE; } static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2]; struct net_bridge *br = netdev_priv(dev); struct net_bridge_mdb_entry *mp; struct nlattr *nest, *nest2; nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) return -EMSGSIZE; hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; if (idx < s_idx) goto skip; nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!nest2) { err = -EMSGSIZE; break; } if (!s_pidx && mp->host_joined) { err = __mdb_fill_info(skb, mp, NULL); if (err) { nla_nest_cancel(skb, nest2); break; } } for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; pp = &p->next) { if (!p->key.port) continue; if (pidx < s_pidx) goto skip_pg; err = __mdb_fill_info(skb, mp, p); if (err) { nla_nest_end(skb, nest2); goto out; } skip_pg: pidx++; } pidx = 0; s_pidx = 0; nla_nest_end(skb, nest2); skip: idx++; } out: cb->args[1] = idx; cb->args[2] = pidx; nla_nest_end(skb, nest); return err; } int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMDB, sizeof(*bpm), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->ifindex = dev->ifindex; rcu_read_lock(); err = br_mdb_fill_info(skb, cb, dev); if (err) goto out; err = br_rports_fill_info(skb, &br->multicast_ctx); if (err) goto out; out: rcu_read_unlock(); nlmsg_end(skb, nlh); return err; } static int nlmsg_populate_mdb_fill(struct sk_buff *skb, struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type) { struct nlmsghdr *nlh; struct br_port_msg *bpm; struct nlattr *nest, *nest2; nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) goto cancel; nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (nest2 == NULL) goto end; if (__mdb_fill_info(skb, mp, pg)) goto end; nla_nest_end(skb, nest2); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); return 0; end: nla_nest_end(skb, nest); cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static size_t rtnl_mdb_nlmsg_pg_size(const struct net_bridge_port_group *pg) { struct net_bridge_group_src *ent; size_t nlmsg_size, addr_size = 0; /* MDBA_MDB_ENTRY_INFO */ nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + /* MDBA_MDB_EATTR_TIMER */ nla_total_size(sizeof(u32)); if (!pg) goto out; /* MDBA_MDB_EATTR_RTPROT */ nlmsg_size += nla_total_size(sizeof(u8)); switch (pg->key.addr.proto) { case htons(ETH_P_IP): /* MDBA_MDB_EATTR_SOURCE */ if (pg->key.addr.src.ip4) nlmsg_size += nla_total_size(sizeof(__be32)); if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2) goto out; addr_size = sizeof(__be32); break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): /* MDBA_MDB_EATTR_SOURCE */ if (!ipv6_addr_any(&pg->key.addr.src.ip6)) nlmsg_size += nla_total_size(sizeof(struct in6_addr)); if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1) goto out; addr_size = sizeof(struct in6_addr); break; #endif } /* MDBA_MDB_EATTR_GROUP_MODE */ nlmsg_size += nla_total_size(sizeof(u8)); /* MDBA_MDB_EATTR_SRC_LIST nested attr */ if (!hlist_empty(&pg->src_list)) nlmsg_size += nla_total_size(0); hlist_for_each_entry(ent, &pg->src_list, node) { /* MDBA_MDB_SRCLIST_ENTRY nested attr + * MDBA_MDB_SRCATTR_ADDRESS + MDBA_MDB_SRCATTR_TIMER */ nlmsg_size += nla_total_size(0) + nla_total_size(addr_size) + nla_total_size(sizeof(u32)); } out: return nlmsg_size; } static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0) + /* Port group entry */ rtnl_mdb_nlmsg_pg_size(pg); } void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; br_switchdev_mdb_notify(dev, mp, pg, type); skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); if (!skb) goto errout; err = nlmsg_populate_mdb_fill(skb, dev, mp, pg, type); if (err < 0) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); return; errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct net_device *dev, int ifindex, u16 vid, u32 pid, u32 seq, int type, unsigned int flags) { struct nlattr *nest, *port_nest; struct br_port_msg *bpm; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (!nest) goto cancel; port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); if (!port_nest) goto end; if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) { nla_nest_cancel(skb, port_nest); goto end; } if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) { nla_nest_cancel(skb, port_nest); goto end; } nla_nest_end(skb, port_nest); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); return 0; end: nla_nest_end(skb, nest); cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static inline size_t rtnl_rtr_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) + nla_total_size(sizeof(__u32)) + nla_total_size(sizeof(u16)); } void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; int ifindex; u16 vid; ifindex = pmctx ? pmctx->port->dev->ifindex : 0; vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid : 0; skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type, NTF_SELF); if (err < 0) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); return; errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } static const struct nla_policy br_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = { [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), }; static const struct nla_policy br_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(br_mdbe_src_list_entry_pol), }; static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE, MCAST_INCLUDE), [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(br_mdbe_src_list_pol), [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), }; static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { switch (proto) { case htons(ETH_P_IP): if (nla_len(attr) != sizeof(struct in_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length"); return false; } if (ipv4_is_multicast(nla_get_in_addr(attr))) { NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed"); return false; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct in6_addr src; if (nla_len(attr) != sizeof(struct in6_addr)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length"); return false; } src = nla_get_in6_addr(attr); if (ipv6_addr_is_multicast(&src)) { NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed"); return false; } break; } #endif default: NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address"); return false; } return true; } static struct net_bridge_mcast * __br_mdb_choose_context(struct net_bridge *br, const struct br_mdb_entry *entry, struct netlink_ext_ack *extack) { struct net_bridge_mcast *brmctx = NULL; struct net_bridge_vlan *v; if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { brmctx = &br->multicast_ctx; goto out; } if (!entry->vid) { NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled"); goto out; } v = br_vlan_find(br_vlan_group(br), entry->vid); if (!v) { NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured"); goto out; } if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) { NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled"); goto out; } brmctx = &v->br_mcast_ctx; out: return brmctx; } static int br_mdb_replace_group_sg(const struct br_mdb_config *cfg, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_mcast *brmctx, unsigned char flags) { unsigned long now = jiffies; pg->flags = flags; pg->rt_protocol = cfg->rt_protocol; if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry) mod_timer(&pg->timer, now + brmctx->multicast_membership_interval); else del_timer(&pg->timer); br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB); return 0; } static int br_mdb_add_group_sg(const struct br_mdb_config *cfg, struct net_bridge_mdb_entry *mp, struct net_bridge_mcast *brmctx, unsigned char flags, struct netlink_ext_ack *extack) { struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; unsigned long now = jiffies; for (pp = &mp->ports; (p = mlock_dereference(*pp, cfg->br)) != NULL; pp = &p->next) { if (p->key.port == cfg->p) { if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "(S, G) group is already joined by port"); return -EEXIST; } return br_mdb_replace_group_sg(cfg, mp, p, brmctx, flags); } if ((unsigned long)p->key.port < (unsigned long)cfg->p) break; } p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL, MCAST_INCLUDE, cfg->rt_protocol, extack); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry) mod_timer(&p->timer, now + brmctx->multicast_membership_interval); br_mdb_notify(cfg->br->dev, mp, p, RTM_NEWMDB); /* All of (*, G) EXCLUDE ports need to be added to the new (S, G) for * proper replication. */ if (br_multicast_should_handle_mode(brmctx, cfg->group.proto)) { struct net_bridge_mdb_entry *star_mp; struct br_ip star_group; star_group = p->key.addr; memset(&star_group.src, 0, sizeof(star_group.src)); star_mp = br_mdb_ip_get(cfg->br, &star_group); if (star_mp) br_multicast_sg_add_exclude_ports(star_mp, p); } return 0; } static int br_mdb_add_group_src_fwd(const struct br_mdb_config *cfg, struct br_ip *src_ip, struct net_bridge_mcast *brmctx, struct netlink_ext_ack *extack) { struct net_bridge_mdb_entry *sgmp; struct br_mdb_config sg_cfg; struct br_ip sg_ip; u8 flags = 0; sg_ip = cfg->group; sg_ip.src = src_ip->src; sgmp = br_multicast_new_group(cfg->br, &sg_ip); if (IS_ERR(sgmp)) { NL_SET_ERR_MSG_MOD(extack, "Failed to add (S, G) MDB entry"); return PTR_ERR(sgmp); } if (cfg->entry->state == MDB_PERMANENT) flags |= MDB_PG_FLAGS_PERMANENT; if (cfg->filter_mode == MCAST_EXCLUDE) flags |= MDB_PG_FLAGS_BLOCKED; memset(&sg_cfg, 0, sizeof(sg_cfg)); sg_cfg.br = cfg->br; sg_cfg.p = cfg->p; sg_cfg.entry = cfg->entry; sg_cfg.group = sg_ip; sg_cfg.src_entry = true; sg_cfg.filter_mode = MCAST_INCLUDE; sg_cfg.rt_protocol = cfg->rt_protocol; sg_cfg.nlflags = cfg->nlflags; return br_mdb_add_group_sg(&sg_cfg, sgmp, brmctx, flags, extack); } static int br_mdb_add_group_src(const struct br_mdb_config *cfg, struct net_bridge_port_group *pg, struct net_bridge_mcast *brmctx, struct br_mdb_src_entry *src, struct netlink_ext_ack *extack) { struct net_bridge_group_src *ent; unsigned long now = jiffies; int err; ent = br_multicast_find_group_src(pg, &src->addr); if (!ent) { ent = br_multicast_new_group_src(pg, &src->addr); if (!ent) { NL_SET_ERR_MSG_MOD(extack, "Failed to add new source entry"); return -ENOSPC; } } else if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Source entry already exists"); return -EEXIST; } if (cfg->filter_mode == MCAST_INCLUDE && cfg->entry->state == MDB_TEMPORARY) mod_timer(&ent->timer, now + br_multicast_gmi(brmctx)); else del_timer(&ent->timer); /* Install a (S, G) forwarding entry for the source. */ err = br_mdb_add_group_src_fwd(cfg, &src->addr, brmctx, extack); if (err) goto err_del_sg; ent->flags = BR_SGRP_F_INSTALLED | BR_SGRP_F_USER_ADDED; return 0; err_del_sg: __br_multicast_del_group_src(ent); return err; } static void br_mdb_del_group_src(struct net_bridge_port_group *pg, struct br_mdb_src_entry *src) { struct net_bridge_group_src *ent; ent = br_multicast_find_group_src(pg, &src->addr); if (WARN_ON_ONCE(!ent)) return; br_multicast_del_group_src(ent, false); } static int br_mdb_add_group_srcs(const struct br_mdb_config *cfg, struct net_bridge_port_group *pg, struct net_bridge_mcast *brmctx, struct netlink_ext_ack *extack) { int i, err; for (i = 0; i < cfg->num_src_entries; i++) { err = br_mdb_add_group_src(cfg, pg, brmctx, &cfg->src_entries[i], extack); if (err) goto err_del_group_srcs; } return 0; err_del_group_srcs: for (i--; i >= 0; i--) br_mdb_del_group_src(pg, &cfg->src_entries[i]); return err; } static int br_mdb_replace_group_srcs(const struct br_mdb_config *cfg, struct net_bridge_port_group *pg, struct net_bridge_mcast *brmctx, struct netlink_ext_ack *extack) { struct net_bridge_group_src *ent; struct hlist_node *tmp; int err; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags |= BR_SGRP_F_DELETE; err = br_mdb_add_group_srcs(cfg, pg, brmctx, extack); if (err) goto err_clear_delete; hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) { if (ent->flags & BR_SGRP_F_DELETE) br_multicast_del_group_src(ent, false); } return 0; err_clear_delete: hlist_for_each_entry(ent, &pg->src_list, node) ent->flags &= ~BR_SGRP_F_DELETE; return err; } static int br_mdb_replace_group_star_g(const struct br_mdb_config *cfg, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_mcast *brmctx, unsigned char flags, struct netlink_ext_ack *extack) { unsigned long now = jiffies; int err; err = br_mdb_replace_group_srcs(cfg, pg, brmctx, extack); if (err) return err; pg->flags = flags; pg->filter_mode = cfg->filter_mode; pg->rt_protocol = cfg->rt_protocol; if (!(flags & MDB_PG_FLAGS_PERMANENT) && cfg->filter_mode == MCAST_EXCLUDE) mod_timer(&pg->timer, now + brmctx->multicast_membership_interval); else del_timer(&pg->timer); br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB); if (br_multicast_should_handle_mode(brmctx, cfg->group.proto)) br_multicast_star_g_handle_mode(pg, cfg->filter_mode); return 0; } static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg, struct net_bridge_mdb_entry *mp, struct net_bridge_mcast *brmctx, unsigned char flags, struct netlink_ext_ack *extack) { struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; unsigned long now = jiffies; int err; for (pp = &mp->ports; (p = mlock_dereference(*pp, cfg->br)) != NULL; pp = &p->next) { if (p->key.port == cfg->p) { if (!(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "(*, G) group is already joined by port"); return -EEXIST; } return br_mdb_replace_group_star_g(cfg, mp, p, brmctx, flags, extack); } if ((unsigned long)p->key.port < (unsigned long)cfg->p) break; } p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL, cfg->filter_mode, cfg->rt_protocol, extack); if (unlikely(!p)) return -ENOMEM; err = br_mdb_add_group_srcs(cfg, p, brmctx, extack); if (err) goto err_del_port_group; rcu_assign_pointer(*pp, p); if (!(flags & MDB_PG_FLAGS_PERMANENT) && cfg->filter_mode == MCAST_EXCLUDE) mod_timer(&p->timer, now + brmctx->multicast_membership_interval); br_mdb_notify(cfg->br->dev, mp, p, RTM_NEWMDB); /* If we are adding a new EXCLUDE port group (*, G), it needs to be * also added to all (S, G) entries for proper replication. */ if (br_multicast_should_handle_mode(brmctx, cfg->group.proto) && cfg->filter_mode == MCAST_EXCLUDE) br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE); return 0; err_del_port_group: br_multicast_del_port_group(p); return err; } static int br_mdb_add_group(const struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = cfg->entry; struct net_bridge_port *port = cfg->p; struct net_bridge_mdb_entry *mp; struct net_bridge *br = cfg->br; struct net_bridge_mcast *brmctx; struct br_ip group = cfg->group; unsigned char flags = 0; brmctx = __br_mdb_choose_context(br, entry, extack); if (!brmctx) return -EINVAL; mp = br_multicast_new_group(br, &group); if (IS_ERR(mp)) return PTR_ERR(mp); /* host join */ if (!port) { if (mp->host_joined) { NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); return -EEXIST; } br_multicast_host_join(brmctx, mp, false); br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB); return 0; } if (entry->state == MDB_PERMANENT) flags |= MDB_PG_FLAGS_PERMANENT; if (br_multicast_is_star_g(&group)) return br_mdb_add_group_star_g(cfg, mp, brmctx, flags, extack); else return br_mdb_add_group_sg(cfg, mp, brmctx, flags, extack); } static int __br_mdb_add(const struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { int ret; spin_lock_bh(&cfg->br->multicast_lock); ret = br_mdb_add_group(cfg, extack); spin_unlock_bh(&cfg->br->multicast_lock); return ret; } static int br_mdb_config_src_entry_init(struct nlattr *src_entry, struct br_mdb_src_entry *src, __be16 proto, struct netlink_ext_ack *extack) { struct nlattr *tb[MDBE_SRCATTR_MAX + 1]; int err; err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry, br_mdbe_src_list_entry_pol, extack); if (err) return err; if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS)) return -EINVAL; if (!is_valid_mdb_source(tb[MDBE_SRCATTR_ADDRESS], proto, extack)) return -EINVAL; src->addr.proto = proto; nla_memcpy(&src->addr.src, tb[MDBE_SRCATTR_ADDRESS], nla_len(tb[MDBE_SRCATTR_ADDRESS])); return 0; } static int br_mdb_config_src_list_init(struct nlattr *src_list, struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { struct nlattr *src_entry; int rem, err; int i = 0; nla_for_each_nested(src_entry, src_list, rem) cfg->num_src_entries++; if (cfg->num_src_entries >= PG_SRC_ENT_LIMIT) { NL_SET_ERR_MSG_FMT_MOD(extack, "Exceeded maximum number of source entries (%u)", PG_SRC_ENT_LIMIT - 1); return -EINVAL; } cfg->src_entries = kcalloc(cfg->num_src_entries, sizeof(struct br_mdb_src_entry), GFP_KERNEL); if (!cfg->src_entries) return -ENOMEM; nla_for_each_nested(src_entry, src_list, rem) { err = br_mdb_config_src_entry_init(src_entry, &cfg->src_entries[i], cfg->entry->addr.proto, extack); if (err) goto err_src_entry_init; i++; } return 0; err_src_entry_init: kfree(cfg->src_entries); return err; } static void br_mdb_config_src_list_fini(struct br_mdb_config *cfg) { kfree(cfg->src_entries); } static int br_mdb_config_attrs_init(struct nlattr *set_attrs, struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1]; int err; err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, set_attrs, br_mdbe_attrs_pol, extack); if (err) return err; if (mdb_attrs[MDBE_ATTR_SOURCE] && !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE], cfg->entry->addr.proto, extack)) return -EINVAL; __mdb_entry_to_br_ip(cfg->entry, &cfg->group, mdb_attrs); if (mdb_attrs[MDBE_ATTR_GROUP_MODE]) { if (!cfg->p) { NL_SET_ERR_MSG_MOD(extack, "Filter mode cannot be set for host groups"); return -EINVAL; } if (!br_multicast_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries"); return -EINVAL; } cfg->filter_mode = nla_get_u8(mdb_attrs[MDBE_ATTR_GROUP_MODE]); } else { cfg->filter_mode = MCAST_EXCLUDE; } if (mdb_attrs[MDBE_ATTR_SRC_LIST]) { if (!cfg->p) { NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set for host groups"); return -EINVAL; } if (!br_multicast_is_star_g(&cfg->group)) { NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries"); return -EINVAL; } if (!mdb_attrs[MDBE_ATTR_GROUP_MODE]) { NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode"); return -EINVAL; } err = br_mdb_config_src_list_init(mdb_attrs[MDBE_ATTR_SRC_LIST], cfg, extack); if (err) return err; } if (!cfg->num_src_entries && cfg->filter_mode == MCAST_INCLUDE) { NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list"); return -EINVAL; } if (mdb_attrs[MDBE_ATTR_RTPROT]) { if (!cfg->p) { NL_SET_ERR_MSG_MOD(extack, "Protocol cannot be set for host groups"); return -EINVAL; } cfg->rt_protocol = nla_get_u8(mdb_attrs[MDBE_ATTR_RTPROT]); } return 0; } static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); memset(cfg, 0, sizeof(*cfg)); cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; cfg->nlflags = nlmsg_flags; cfg->br = netdev_priv(dev); if (!netif_running(cfg->br->dev)) { NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running"); return -EINVAL; } if (!br_opt_get(cfg->br, BROPT_MULTICAST_ENABLED)) { NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled"); return -EINVAL; } cfg->entry = nla_data(tb[MDBA_SET_ENTRY]); if (cfg->entry->ifindex != cfg->br->dev->ifindex) { struct net_device *pdev; pdev = __dev_get_by_index(net, cfg->entry->ifindex); if (!pdev) { NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist"); return -ENODEV; } cfg->p = br_port_get_rtnl(pdev); if (!cfg->p) { NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port"); return -EINVAL; } if (cfg->p->br != cfg->br) { NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); return -EINVAL; } } if (cfg->entry->addr.proto == htons(ETH_P_IP) && ipv4_is_zeronet(cfg->entry->addr.u.ip4)) { NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address 0.0.0.0 is not allowed"); return -EINVAL; } if (tb[MDBA_SET_ENTRY_ATTRS]) return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg, extack); else __mdb_entry_to_br_ip(cfg->entry, &cfg->group, NULL); return 0; } static void br_mdb_config_fini(struct br_mdb_config *cfg) { br_mdb_config_src_list_fini(cfg); } int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; err = br_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; err = -EINVAL; /* host join errors which can happen before creating the group */ if (!cfg.p && !br_group_is_l2(&cfg.group)) { /* don't allow any flags for host-joined IP groups */ if (cfg.entry->state) { NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); goto out; } if (!br_multicast_is_star_g(&cfg.group)) { NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); goto out; } } if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) { NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); goto out; } if (cfg.p) { if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) { NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent"); goto out; } vg = nbp_vlan_group(cfg.p); } else { vg = br_vlan_group(cfg.br); } /* If vlan filtering is enabled and VLAN is not specified * install mdb entry on all vlans configured on the port. */ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { cfg.entry->vid = v->vid; cfg.group.vid = v->vid; err = __br_mdb_add(&cfg, extack); if (err) break; } } else { err = __br_mdb_add(&cfg, extack); } out: br_mdb_config_fini(&cfg); return err; } static int __br_mdb_del(const struct br_mdb_config *cfg) { struct br_mdb_entry *entry = cfg->entry; struct net_bridge *br = cfg->br; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct br_ip ip = cfg->group; int err = -EINVAL; spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &ip); if (!mp) goto unlock; /* host leave */ if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) { br_multicast_host_leave(mp, false); err = 0; br_mdb_notify(br->dev, mp, NULL, RTM_DELMDB); if (!mp->ports && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); goto unlock; } for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex) continue; br_multicast_del_pg(mp, p, pp); err = 0; break; } unlock: spin_unlock_bh(&br->multicast_lock); return err; } int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; err = br_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; if (cfg.p) vg = nbp_vlan_group(cfg.p); else vg = br_vlan_group(cfg.br); /* If vlan filtering is enabled and VLAN is not specified * delete mdb entry on all vlans configured on the port. */ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { cfg.entry->vid = v->vid; cfg.group.vid = v->vid; err = __br_mdb_del(&cfg); } } else { err = __br_mdb_del(&cfg); } br_mdb_config_fini(&cfg); return err; } static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), sizeof(struct in6_addr)), }; static int br_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], struct br_ip *group, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; int err; if (!tb[MDBA_GET_ENTRY_ATTRS]) { __mdb_entry_to_br_ip(entry, group, NULL); return 0; } err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, tb[MDBA_GET_ENTRY_ATTRS], br_mdbe_attrs_get_pol, extack); if (err) return err; if (mdbe_attrs[MDBE_ATTR_SOURCE] && !is_valid_mdb_source(mdbe_attrs[MDBE_ATTR_SOURCE], entry->addr.proto, extack)) return -EINVAL; __mdb_entry_to_br_ip(entry, group, mdbe_attrs); return 0; } static struct sk_buff * br_mdb_get_reply_alloc(const struct net_bridge_mdb_entry *mp) { struct net_bridge_port_group *pg; size_t nlmsg_size; nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + /* MDBA_MDB */ nla_total_size(0) + /* MDBA_MDB_ENTRY */ nla_total_size(0); if (mp->host_joined) nlmsg_size += rtnl_mdb_nlmsg_pg_size(NULL); for (pg = mlock_dereference(mp->ports, mp->br); pg; pg = mlock_dereference(pg->next, mp->br)) nlmsg_size += rtnl_mdb_nlmsg_pg_size(pg); return nlmsg_new(nlmsg_size, GFP_ATOMIC); } static int br_mdb_get_reply_fill(struct sk_buff *skb, struct net_bridge_mdb_entry *mp, u32 portid, u32 seq) { struct nlattr *mdb_nest, *mdb_entry_nest; struct net_bridge_port_group *pg; struct br_port_msg *bpm; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; bpm = nlmsg_data(nlh); memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = mp->br->dev->ifindex; mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); if (!mdb_nest) { err = -EMSGSIZE; goto cancel; } mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!mdb_entry_nest) { err = -EMSGSIZE; goto cancel; } if (mp->host_joined) { err = __mdb_fill_info(skb, mp, NULL); if (err) goto cancel; } for (pg = mlock_dereference(mp->ports, mp->br); pg; pg = mlock_dereference(pg->next, mp->br)) { err = __mdb_fill_info(skb, mp, pg); if (err) goto cancel; } nla_nest_end(skb, mdb_entry_nest); nla_nest_end(skb, mdb_nest); nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return err; } int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_mdb_entry *mp; struct sk_buff *skb; struct br_ip group; int err; err = br_mdb_get_parse(dev, tb, &group, extack); if (err) return err; /* Hold the multicast lock to ensure that the MDB entry does not change * between the time the reply size is determined and when the reply is * filled in. */ spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &group); if (!mp) { NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); err = -ENOENT; goto unlock; } skb = br_mdb_get_reply_alloc(mp); if (!skb) { err = -ENOMEM; goto unlock; } err = br_mdb_get_reply_fill(skb, mp, portid, seq); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); goto free; } spin_unlock_bh(&br->multicast_lock); return rtnl_unicast(skb, dev_net(dev), portid); free: kfree_skb(skb); unlock: spin_unlock_bh(&br->multicast_lock); return err; } |
1 11 11 10 10 10 4 || // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/memory.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/hash.h> #include <linux/slab.h> #include "common.h" /** * tomoyo_warn_oom - Print out of memory warning message. * * @function: Function's name. */ void tomoyo_warn_oom(const char *function) { /* Reduce error messages. */ static pid_t tomoyo_last_pid; const pid_t pid = current->pid; if (tomoyo_last_pid != pid) { pr_warn("ERROR: Out of memory at %s.\n", function); tomoyo_last_pid = pid; } if (!tomoyo_policy_loaded) panic("MAC Initialization failed.\n"); } /* Memoy currently used by policy/audit log/query. */ unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; /* Memory quota for "policy"/"audit log"/"query". */ unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; /** * tomoyo_memory_ok - Check memory quota. * * @ptr: Pointer to allocated memory. * * Returns true on success, false otherwise. * * Returns true if @ptr is not NULL and quota not exceeded, false otherwise. * * Caller holds tomoyo_policy_lock mutex. */ bool tomoyo_memory_ok(void *ptr) { if (ptr) { const size_t s = ksize(ptr); tomoyo_memory_used[TOMOYO_MEMORY_POLICY] += s; if (!tomoyo_memory_quota[TOMOYO_MEMORY_POLICY] || tomoyo_memory_used[TOMOYO_MEMORY_POLICY] <= tomoyo_memory_quota[TOMOYO_MEMORY_POLICY]) return true; tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s; } tomoyo_warn_oom(__func__); return false; } /** * tomoyo_commit_ok - Check memory quota. * * @data: Data to copy from. * @size: Size in byte. * * Returns pointer to allocated memory on success, NULL otherwise. * @data is zero-cleared on success. * * Caller holds tomoyo_policy_lock mutex. */ void *tomoyo_commit_ok(void *data, const unsigned int size) { void *ptr = kzalloc(size, GFP_NOFS | __GFP_NOWARN); if (tomoyo_memory_ok(ptr)) { memmove(ptr, data, size); memset(data, 0, size); return ptr; } kfree(ptr); return NULL; } /** * tomoyo_get_group - Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group". * * @param: Pointer to "struct tomoyo_acl_param". * @idx: Index number. * * Returns pointer to "struct tomoyo_group" on success, NULL otherwise. */ struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param, const u8 idx) { struct tomoyo_group e = { }; struct tomoyo_group *group = NULL; struct list_head *list; const char *group_name = tomoyo_read_token(param); bool found = false; if (!tomoyo_correct_word(group_name) || idx >= TOMOYO_MAX_GROUP) return NULL; e.group_name = tomoyo_get_name(group_name); if (!e.group_name) return NULL; if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; list = ¶m->ns->group_list[idx]; list_for_each_entry(group, list, head.list) { if (e.group_name != group->group_name || atomic_read(&group->head.users) == TOMOYO_GC_IN_PROGRESS) continue; atomic_inc(&group->head.users); found = true; break; } if (!found) { struct tomoyo_group *entry = tomoyo_commit_ok(&e, sizeof(e)); if (entry) { INIT_LIST_HEAD(&entry->member_list); atomic_set(&entry->head.users, 1); list_add_tail_rcu(&entry->head.list, list); group = entry; found = true; } } mutex_unlock(&tomoyo_policy_lock); out: tomoyo_put_name(e.group_name); return found ? group : NULL; } /* * tomoyo_name_list is used for holding string data used by TOMOYO. * Since same string data is likely used for multiple times (e.g. * "/lib/libc-2.5.so"), TOMOYO shares string data in the form of * "const struct tomoyo_path_info *". */ struct list_head tomoyo_name_list[TOMOYO_MAX_HASH]; /** * tomoyo_get_name - Allocate permanent memory for string data. * * @name: The string to store into the permernent memory. * * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise. */ const struct tomoyo_path_info *tomoyo_get_name(const char *name) { struct tomoyo_name *ptr; unsigned int hash; int len; struct list_head *head; if (!name) return NULL; len = strlen(name) + 1; hash = full_name_hash(NULL, (const unsigned char *) name, len - 1); head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)]; if (mutex_lock_interruptible(&tomoyo_policy_lock)) return NULL; list_for_each_entry(ptr, head, head.list) { if (hash != ptr->entry.hash || strcmp(name, ptr->entry.name) || atomic_read(&ptr->head.users) == TOMOYO_GC_IN_PROGRESS) continue; atomic_inc(&ptr->head.users); goto out; } ptr = kzalloc(sizeof(*ptr) + len, GFP_NOFS | __GFP_NOWARN); if (tomoyo_memory_ok(ptr)) { ptr->entry.name = ((char *) ptr) + sizeof(*ptr); memmove((char *) ptr->entry.name, name, len); atomic_set(&ptr->head.users, 1); tomoyo_fill_path_info(&ptr->entry); list_add_tail(&ptr->head.list, head); } else { kfree(ptr); ptr = NULL; } out: mutex_unlock(&tomoyo_policy_lock); return ptr ? &ptr->entry : NULL; } /* Initial namespace.*/ struct tomoyo_policy_namespace tomoyo_kernel_namespace; /** * tomoyo_mm_init - Initialize mm related code. */ void __init tomoyo_mm_init(void) { int idx; for (idx = 0; idx < TOMOYO_MAX_HASH; idx++) INIT_LIST_HEAD(&tomoyo_name_list[idx]); tomoyo_kernel_namespace.name = "<kernel>"; tomoyo_init_policy_namespace(&tomoyo_kernel_namespace); tomoyo_kernel_domain.ns = &tomoyo_kernel_namespace; INIT_LIST_HEAD(&tomoyo_kernel_domain.acl_info_list); tomoyo_kernel_domain.domainname = tomoyo_get_name("<kernel>"); list_add_tail_rcu(&tomoyo_kernel_domain.list, &tomoyo_domain_list); } |
2 1 8 40 21 3 2 2 40 41 34 2 1 22 3 2 1 1 1 4 4 || // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_fifo.c The simplest FIFO queue. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> /* 1 band FIFO pseudo-"scheduler" */ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); } static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); } static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int prev_backlog; if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q, to_free); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } static void fifo_offload_init(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_fifo_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_FIFO_REPLACE; qopt.handle = sch->handle; qopt.parent = sch->parent; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt); } static void fifo_offload_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_fifo_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_FIFO_DESTROY; qopt.handle = sch->handle; qopt.parent = sch->parent; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt); } static int fifo_offload_dump(struct Qdisc *sch) { struct tc_fifo_qopt_offload qopt; qopt.command = TC_FIFO_STATS; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.stats.bstats = &sch->bstats; qopt.stats.qstats = &sch->qstats; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_FIFO, &qopt); } static int __fifo_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { bool bypass; bool is_bfifo = sch->ops == &bfifo_qdisc_ops; if (opt == NULL) { u32 limit = qdisc_dev(sch)->tx_queue_len; if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); sch->limit = limit; } else { struct tc_fifo_qopt *ctl = nla_data(opt); if (nla_len(opt) < sizeof(*ctl)) return -EINVAL; sch->limit = ctl->limit; } if (is_bfifo) bypass = sch->limit >= psched_mtu(qdisc_dev(sch)); else bypass = sch->limit >= 1; if (bypass) sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } static int fifo_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { int err; err = __fifo_init(sch, opt, extack); if (err) return err; fifo_offload_init(sch); return 0; } static int fifo_hd_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { return __fifo_init(sch, opt, extack); } static void fifo_destroy(struct Qdisc *sch) { fifo_offload_destroy(sch); } static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_fifo_qopt opt = { .limit = sch->limit }; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: return -1; } static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { int err; err = fifo_offload_dump(sch); if (err) return err; return __fifo_dump(sch, skb); } static int fifo_hd_dump(struct Qdisc *sch, struct sk_buff *skb) { return __fifo_dump(sch, skb); } struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", .priv_size = 0, .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, .destroy = fifo_destroy, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", .priv_size = 0, .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, .destroy = fifo_destroy, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", .priv_size = 0, .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_hd_init, .reset = qdisc_reset_queue, .change = fifo_hd_init, .dump = fifo_hd_dump, .owner = THIS_MODULE, }; /* Pass size change message down to embedded FIFO */ int fifo_set_limit(struct Qdisc *q, unsigned int limit) { struct nlattr *nla; int ret = -ENOMEM; /* Hack to avoid sending change message to non-FIFO */ if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0; if (!q->ops->change) return 0; nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC; nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; ret = q->ops->change(q, nla, NULL); kfree(nla); } return ret; } EXPORT_SYMBOL(fifo_set_limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, unsigned int limit, struct netlink_ext_ack *extack) { struct Qdisc *q; int err = -ENOMEM; q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1), extack); if (q) { err = fifo_set_limit(q, limit); if (err < 0) { qdisc_put(q); q = NULL; } } return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt); MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler"); |
1 2 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 */ #include <linux/fs.h> #include <linux/slab.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_unicode.h" #include "jfs_debug.h" /* * NAME: jfs_strfromUCS() * * FUNCTION: Convert little-endian unicode string to character string * */ int jfs_strfromUCS_le(char *to, const __le16 * from, int len, struct nls_table *codepage) { int i; int outlen = 0; static int warn_again = 5; /* Only warn up to 5 times total */ int warn = !!warn_again; /* once per string */ if (codepage) { for (i = 0; (i < len) && from[i]; i++) { int charlen; charlen = codepage->uni2char(le16_to_cpu(from[i]), &to[outlen], NLS_MAX_CHARSET_SIZE); if (charlen > 0) outlen += charlen; else to[outlen++] = '?'; } } else { for (i = 0; (i < len) && from[i]; i++) { if (unlikely(le16_to_cpu(from[i]) & 0xff00)) { to[i] = '?'; if (unlikely(warn)) { warn--; warn_again--; printk(KERN_ERR "non-latin1 character 0x%x found in JFS file name\n", le16_to_cpu(from[i])); printk(KERN_ERR "mount with iocharset=utf8 to access\n"); } } else to[i] = (char) (le16_to_cpu(from[i])); } outlen = i; } to[outlen] = 0; return outlen; } /* * NAME: jfs_strtoUCS() * * FUNCTION: Convert character string to unicode string * */ static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len, struct nls_table *codepage) { int charlen; int i; if (codepage) { for (i = 0; len && *from; i++, from += charlen, len -= charlen) { charlen = codepage->char2uni(from, len, &to[i]); if (charlen < 1) { jfs_err("jfs_strtoUCS: char2uni returned %d.", charlen); jfs_err("charset = %s, char = 0x%x", codepage->charset, *from); return charlen; } } } else { for (i = 0; (i < len) && from[i]; i++) to[i] = (wchar_t) from[i]; } to[i] = 0; return i; } /* * NAME: get_UCSname() * * FUNCTION: Allocate and translate to unicode string * */ int get_UCSname(struct component_name * uniName, struct dentry *dentry) { struct nls_table *nls_tab = JFS_SBI(dentry->d_sb)->nls_tab; int length = dentry->d_name.len; if (length > JFS_NAME_MAX) return -ENAMETOOLONG; uniName->name = kmalloc_array(length + 1, sizeof(wchar_t), GFP_NOFS); if (uniName->name == NULL) return -ENOMEM; uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, length, nls_tab); if (uniName->namlen < 0) { kfree(uniName->name); return uniName->namlen; } return 0; } |
1 3 1 3 3 3 3 6 6 2 1 14 5 4 9 10 11 11 11 2 1 3 1 || // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing */ #include "multicast.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/icmpv6.h> #include <linux/if_bridge.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/addrconf.h> #include <net/genetlink.h> #include <net/if_inet6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/sock.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "send.h" #include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" static void batadv_mcast_mla_update(struct work_struct *work); /** * batadv_mcast_start_timer() - schedule the multicast periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) { queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work, msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD)); } /** * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists * @soft_iface: netdev struct of the mesh interface * * If the given soft interface has a bridge on top then the refcount * of the according net device is increased. * * Return: NULL if no such bridge exists. Otherwise the net device of the * bridge. */ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) { struct net_device *upper = soft_iface; rcu_read_lock(); do { upper = netdev_master_upper_dev_get_rcu(upper); } while (upper && !netif_is_bridge_master(upper)); dev_hold(upper); rcu_read_unlock(); return upper; } /** * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from * node for IPv4 * @dev: the interface to check * * Checks the presence of an IPv4 multicast router on this node. * * Caller needs to hold rcu read lock. * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise. */ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_MFORWARD(in_dev)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR4; } /** * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from * node for IPv6 * @dev: the interface to check * * Checks the presence of an IPv6 multicast router on this node. * * Caller needs to hold rcu read lock. * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise. */ #if IS_ENABLED(CONFIG_IPV6_MROUTE) static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; } #else static inline u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { return BATADV_MCAST_WANT_NO_RTR6; } #endif /** * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this * node. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, struct net_device *bridge) { struct net_device *dev = bridge ? bridge : bat_priv->soft_iface; u8 flags = BATADV_NO_FLAGS; rcu_read_lock(); flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev); flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev); rcu_read_unlock(); return flags; } /** * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, struct net_device *bridge) { struct net_device *dev = bat_priv->soft_iface; u8 flags = BATADV_NO_FLAGS; if (!bridge) return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; if (!br_multicast_has_router_adjacent(dev, ETH_P_IP)) flags |= BATADV_MCAST_WANT_NO_RTR4; if (!br_multicast_has_router_adjacent(dev, ETH_P_IPV6)) flags |= BATADV_MCAST_WANT_NO_RTR6; return flags; } /** * batadv_mcast_mla_rtr_flags_get() - get multicast router flags * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this * node or behind its bridge. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, struct net_device *bridge) { u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge); flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge); return flags; } /** * batadv_mcast_mla_flags_get() - get the new multicast flags * @bat_priv: the bat priv with all the soft interface information * * Return: A set of flags for the current/next TVLV, querier and * bridge state. */ static struct batadv_mcast_mla_flags batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) { struct net_device *dev = bat_priv->soft_iface; struct batadv_mcast_querier_state *qr4, *qr6; struct batadv_mcast_mla_flags mla_flags; struct net_device *bridge; bridge = batadv_mcast_get_bridge(dev); memset(&mla_flags, 0, sizeof(mla_flags)); mla_flags.enabled = 1; mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv, bridge); if (!bridge) return mla_flags; dev_put(bridge); mla_flags.bridged = 1; qr4 = &mla_flags.querier_ipv4; qr6 = &mla_flags.querier_ipv6; if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)) pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); qr4->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); qr4->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); qr6->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6); qr6->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6); mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES; /* 1) If no querier exists at all, then multicast listeners on * our local TT clients behind the bridge will keep silent. * 2) If the selected querier is on one of our local TT clients, * behind the bridge, then this querier might shadow multicast * listeners on our local TT clients, behind this bridge. * * In both cases, we will signalize other batman nodes that * we need all multicast traffic of the according protocol. */ if (!qr4->exists || qr4->shadowing) { mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV4; mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR4; } if (!qr6->exists || qr6->shadowing) { mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV6; mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR6; } return mla_flags; } /** * batadv_mcast_mla_is_duplicate() - check whether an address is in a list * @mcast_addr: the multicast address to check * @mcast_list: the list with multicast addresses to search in * * Return: true if the given address is already in the given list. * Otherwise returns false. */ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; hlist_for_each_entry(mcast_entry, mcast_list, list) if (batadv_compare_eth(mcast_entry->addr, mcast_addr)) return true; return false; } /** * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv4 multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct batadv_hw_addr *new; struct in_device *in_dev; u8 mcast_addr[ETH_ALEN]; struct ip_mc_list *pmc; int ret = 0; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4) return 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { rcu_read_unlock(); return 0; } for (pmc = rcu_dereference(in_dev->mc_list); pmc; pmc = rcu_dereference(pmc->next_rcu)) { if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv4_is_local_multicast(pmc->multiaddr)) continue; if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) && !ipv4_is_local_multicast(pmc->multiaddr)) continue; ip_eth_mc_map(pmc->multiaddr, mcast_addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); ret++; } rcu_read_unlock(); return ret; } /** * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv6 multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ #if IS_ENABLED(CONFIG_IPV6) static int batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct batadv_hw_addr *new; struct inet6_dev *in6_dev; u8 mcast_addr[ETH_ALEN]; struct ifmcaddr6 *pmc6; int ret = 0; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6) return 0; rcu_read_lock(); in6_dev = __in6_dev_get(dev); if (!in6_dev) { rcu_read_unlock(); return 0; } for (pmc6 = rcu_dereference(in6_dev->mc_list); pmc6; pmc6 = rcu_dereference(pmc6->next)) { if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) continue; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv6_addr_is_ll_all_nodes(&pmc6->mca_addr)) continue; if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) && IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) > IPV6_ADDR_SCOPE_LINKLOCAL) continue; ipv6_eth_mc_map(&pmc6->mca_addr, mcast_addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); ret++; } rcu_read_unlock(); return ret; } #else static inline int batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { return 0; } #endif /** * batadv_mcast_mla_softif_get() - get softif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * If there is a bridge interface on top of dev, collect from that one * instead. Just like with IP addresses and routes, multicast listeners * will(/should) register to the bridge interface instead of an * enslaved bat0. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_softif_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct net_device *bridge = batadv_mcast_get_bridge(dev); int ret4, ret6 = 0; if (bridge) dev = bridge; ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags); if (ret4 < 0) goto out; ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags); if (ret6 < 0) { ret4 = 0; goto out; } out: dev_put(bridge); return ret4 + ret6; } /** * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address * @dst: destination to write to - a multicast MAC address * @src: source to read from - a multicast IP address * * Converts a given multicast IPv4/IPv6 address from a bridge * to its matching multicast MAC address and copies it into the given * destination buffer. * * Caller needs to make sure the destination buffer can hold * at least ETH_ALEN bytes. */ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) { if (src->proto == htons(ETH_P_IP)) ip_eth_mc_map(src->dst.ip4, dst); #if IS_ENABLED(CONFIG_IPV6) else if (src->proto == htons(ETH_P_IPV6)) ipv6_eth_mc_map(&src->dst.ip6, dst); #endif else eth_zero_addr(dst); } /** * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners * @dev: a bridge slave whose bridge to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing * on foreign, non-mesh devices which we gave access to our mesh via * a bridge on top of the given soft interface, dev, in the given * mcast_list. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_bridge_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); struct br_ip_list *br_ip_entry, *tmp; u8 tvlv_flags = flags->tvlv_flags; struct batadv_hw_addr *new; u8 mcast_addr[ETH_ALEN]; int ret; /* we don't need to detect these devices/listeners, the IGMP/MLD * snooping code of the Linux bridge already does that for us */ ret = br_multicast_list_adjacent(dev, &bridge_mcast_list); if (ret < 0) goto out; list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { if (br_ip_entry->addr.proto == htons(ETH_P_IP)) { if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4) continue; if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4)) continue; if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) && !ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4)) continue; } #if IS_ENABLED(CONFIG_IPV6) if (br_ip_entry->addr.proto == htons(ETH_P_IPV6)) { if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6) continue; if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.dst.ip6)) continue; if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) && IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.dst.ip6) > IPV6_ADDR_SCOPE_LINKLOCAL) continue; } #endif batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); } out: list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) { list_del(&br_ip_entry->list); kfree(br_ip_entry); } return ret; } /** * batadv_mcast_mla_list_free() - free a list of multicast addresses * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. */ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { hlist_del(&mcast_entry->list); kfree(mcast_entry); } } /** * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which should _not_ be removed * * Retracts the announcement of any multicast listener from the * translation table except the ones listed in the given mcast_list. * * If mcast_list is NULL then all are retracted. */ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { if (mcast_list && batadv_mcast_mla_is_duplicate(mcast_entry->addr, mcast_list)) continue; batadv_tt_local_remove(bat_priv, mcast_entry->addr, BATADV_NO_FLAGS, "mcast TT outdated", false); hlist_del(&mcast_entry->list); kfree(mcast_entry); } } /** * batadv_mcast_mla_tt_add() - add multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which are going to get added * * Adds multicast listener announcements from the given mcast_list to the * translation table if they have not been added yet. */ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; if (!mcast_list) return; hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { if (batadv_mcast_mla_is_duplicate(mcast_entry->addr, &bat_priv->mcast.mla_list)) continue; if (!batadv_tt_local_add(bat_priv->soft_iface, mcast_entry->addr, BATADV_NO_FLAGS, BATADV_NULL_IFINDEX, BATADV_NO_MARK)) continue; hlist_del(&mcast_entry->list); hlist_add_head(&mcast_entry->list, &bat_priv->mcast.mla_list); } } /** * batadv_mcast_querier_log() - debug output regarding the querier status on * link * @bat_priv: the bat priv with all the soft interface information * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") * @old_state: the previous querier state on our link * @new_state: the new querier state on our link * * Outputs debug messages to the logging facility with log level 'mcast' * regarding changes to the querier status on the link which are relevant * to our multicast optimizations. * * Usually this is about whether a querier appeared or vanished in * our mesh or whether the querier is in the suboptimal position of being * behind our local bridge segment: Snooping switches will directly * forward listener reports to the querier, therefore batman-adv and * the bridge will potentially not see these listeners - the querier is * potentially shadowing listeners from us then. * * This is only interesting for nodes with a bridge on top of their * soft interface. */ static void batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, struct batadv_mcast_querier_state *old_state, struct batadv_mcast_querier_state *new_state) { if (!old_state->exists && new_state->exists) batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", str_proto); else if (old_state->exists && !new_state->exists) batadv_info(bat_priv->soft_iface, "%s Querier disappeared - multicast optimizations disabled\n", str_proto); else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists) batadv_info(bat_priv->soft_iface, "No %s Querier present - multicast optimizations disabled\n", str_proto); if (new_state->exists) { if ((!old_state->shadowing && new_state->shadowing) || (!old_state->exists && new_state->shadowing)) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "%s Querier is behind our bridged segment: Might shadow listeners\n", str_proto); else if (old_state->shadowing && !new_state->shadowing) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "%s Querier is not behind our bridged segment\n", str_proto); } } /** * batadv_mcast_bridge_log() - debug output for topology changes in bridged * setups * @bat_priv: the bat priv with all the soft interface information * @new_flags: flags indicating the new multicast state * * If no bridges are ever used on this node, then this function does nothing. * * Otherwise this function outputs debug information to the 'mcast' log level * which might be relevant to our multicast optimizations. * * More precisely, it outputs information when a bridge interface is added or * removed from a soft interface. And when a bridge is present, it further * outputs information about the querier state which is relevant for the * multicast flags this node is going to set. */ static void batadv_mcast_bridge_log(struct batadv_priv *bat_priv, struct batadv_mcast_mla_flags *new_flags) { struct batadv_mcast_mla_flags *old_flags = &bat_priv->mcast.mla_flags; if (!old_flags->bridged && new_flags->bridged) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Bridge added: Setting Unsnoopables(U)-flag\n"); else if (old_flags->bridged && !new_flags->bridged) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Bridge removed: Unsetting Unsnoopables(U)-flag\n"); if (new_flags->bridged) { batadv_mcast_querier_log(bat_priv, "IGMP", &old_flags->querier_ipv4, &new_flags->querier_ipv4); batadv_mcast_querier_log(bat_priv, "MLD", &old_flags->querier_ipv6, &new_flags->querier_ipv6); } } /** * batadv_mcast_flags_log() - output debug information about mcast flag changes * @bat_priv: the bat priv with all the soft interface information * @flags: TVLV flags indicating the new multicast state * * Whenever the multicast TVLV flags this node announces change, this function * should be used to notify userspace about the change. */ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) { bool old_enabled = bat_priv->mcast.mla_flags.enabled; u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags; char str_old_flags[] = "[.... . ]"; sprintf(str_old_flags, "[%c%c%c%s%s]", (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n", old_enabled ? str_old_flags : "<undefined>", (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); } /** * batadv_mcast_mla_flags_update() - update multicast flags * @bat_priv: the bat priv with all the soft interface information * @flags: flags indicating the new multicast state * * Updates the own multicast tvlv with our current multicast related settings, * capabilities and inabilities. */ static void batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, struct batadv_mcast_mla_flags *flags) { struct batadv_tvlv_mcast_data mcast_data; if (!memcmp(flags, &bat_priv->mcast.mla_flags, sizeof(*flags))) return; batadv_mcast_bridge_log(bat_priv, flags); batadv_mcast_flags_log(bat_priv, flags->tvlv_flags); mcast_data.flags = flags->tvlv_flags; memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved)); batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2, &mcast_data, sizeof(mcast_data)); bat_priv->mcast.mla_flags = *flags; } /** * __batadv_mcast_mla_update() - update the own MLAs * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. * * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are * ensured by the non-parallel execution of the worker this function * belongs to. */ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { struct net_device *soft_iface = bat_priv->soft_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; struct batadv_mcast_mla_flags flags; int ret; flags = batadv_mcast_mla_flags_get(bat_priv); ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags); if (ret < 0) goto out; ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags); if (ret < 0) goto out; spin_lock(&bat_priv->mcast.mla_lock); batadv_mcast_mla_tt_retract(bat_priv, &mcast_list); batadv_mcast_mla_tt_add(bat_priv, &mcast_list); batadv_mcast_mla_flags_update(bat_priv, &flags); spin_unlock(&bat_priv->mcast.mla_lock); out: batadv_mcast_mla_list_free(&mcast_list); } /** * batadv_mcast_mla_update() - update the own MLAs * @work: kernel work struct * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. * * In the end, reschedules the work timer. */ static void batadv_mcast_mla_update(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_priv_mcast *priv_mcast; struct batadv_priv *bat_priv; delayed_work = to_delayed_work(work); priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work); bat_priv = container_of(priv_mcast, struct batadv_priv, mcast); __batadv_mcast_mla_update(bat_priv); batadv_mcast_start_timer(bat_priv); } /** * batadv_mcast_is_report_ipv4() - check for IGMP reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. * * Checks whether the given frame is a valid IGMP report. * * Return: If so then true, otherwise false. */ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) { if (ip_mc_check_igmp(skb) < 0) return false; switch (igmp_hdr(skb)->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: case IGMPV3_HOST_MEMBERSHIP_REPORT: return true; } return false; } /** * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv4 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given IPv4 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory * allocation failure. */ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct iphdr *iphdr; /* We might fail due to out-of-memory -> drop it */ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr))) return -ENOMEM; if (batadv_mcast_is_report_ipv4(skb)) return -EINVAL; iphdr = ip_hdr(skb); /* link-local multicast listeners behind a bridge are * not snoopable (see RFC4541, section 2.1.2.2) */ if (ipv4_is_local_multicast(iphdr->daddr)) *is_unsnoopable = true; else *is_routable = ETH_P_IP; return 0; } /** * batadv_mcast_is_report_ipv6() - check for MLD reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. * * Checks whether the given frame is a valid MLD report. * * Return: If so then true, otherwise false. */ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) { if (ipv6_mc_check_mld(skb) < 0) return false; switch (icmp6_hdr(skb)->icmp6_type) { case ICMPV6_MGM_REPORT: case ICMPV6_MLD2_REPORT: return true; } return false; } /** * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv6 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given IPv6 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct ipv6hdr *ip6hdr; /* We might fail due to out-of-memory -> drop it */ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr))) return -ENOMEM; if (batadv_mcast_is_report_ipv6(skb)) return -EINVAL; ip6hdr = ipv6_hdr(skb); if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) < IPV6_ADDR_SCOPE_LINKLOCAL) return -EINVAL; /* link-local-all-nodes multicast listeners behind a bridge are * not snoopable (see RFC4541, section 3, paragraph 3) */ if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr)) *is_unsnoopable = true; else if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) > IPV6_ADDR_SCOPE_LINKLOCAL) *is_routable = ETH_P_IPV6; return 0; } /** * batadv_mcast_forw_mode_check() - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast frame to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given multicast ethernet frame has the potential to be * forwarded with a mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct ethhdr *ethhdr = eth_hdr(skb); if (!atomic_read(&bat_priv->multicast_mode)) return -EINVAL; switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable, is_routable); case ETH_P_IPV6: if (!IS_ENABLED(CONFIG_IPV6)) return -EINVAL; return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable, is_routable); default: return -EINVAL; } } /** * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast * interest * @bat_priv: the bat priv with all the soft interface information * @ethhdr: ethernet header of a packet * * Return: the number of nodes which want all IPv4 multicast traffic if the * given ethhdr is from an IPv4 packet or the number of nodes which want all * IPv6 traffic if it matches an IPv6 packet. */ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, struct ethhdr *ethhdr) { switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: return atomic_read(&bat_priv->mcast.num_want_all_ipv4); case ETH_P_IPV6: return atomic_read(&bat_priv->mcast.num_want_all_ipv6); default: /* we shouldn't be here... */ return 0; } } /** * batadv_mcast_forw_rtr_count() - count nodes with a multicast router * @bat_priv: the bat priv with all the soft interface information * @protocol: the ethernet protocol type to count multicast routers for * * Return: the number of nodes which want all routable IPv4 multicast traffic * if the protocol is ETH_P_IP or the number of nodes which want all routable * IPv6 traffic if the protocol is ETH_P_IPV6. Otherwise returns 0. */ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, int protocol) { switch (protocol) { case ETH_P_IP: return atomic_read(&bat_priv->mcast.num_want_all_rtr4); case ETH_P_IPV6: return atomic_read(&bat_priv->mcast.num_want_all_rtr6); default: return 0; } } /** * batadv_mcast_forw_mode() - check on how to forward a multicast packet * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to check * @is_routable: stores whether the destination is routable * * Return: The forwarding mode as enum batadv_forw_mode. */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; struct ethhdr *ethhdr; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) return BATADV_FORW_BCAST; ethhdr = eth_hdr(skb); tt_count = batadv_tt_global_hash_count(bat_priv, ethhdr->h_dest, BATADV_NO_FLAGS); ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; if (!total_count) return BATADV_FORW_NONE; else if (unsnoop_count) return BATADV_FORW_BCAST; if (total_count <= atomic_read(&bat_priv->multicast_fanout)) return BATADV_FORW_UCASTS; return BATADV_FORW_BCAST; } /** * batadv_mcast_forw_send_orig() - send a multicast packet to an originator * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to send * @vid: the vlan identifier * @orig_node: the originator to send the packet to * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, struct batadv_orig_node *orig_node) { /* Avoid sending multicast-in-unicast packets to other BLA * gateways - they already got the frame from the LAN side * we share with them. * TODO: Refactor to take BLA into account earlier, to avoid * reducing the mcast_fanout count. */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) { dev_kfree_skb(skb); return NET_XMIT_SUCCESS; } return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, orig_node, vid); } /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any multicast * listener registered in the translation table. A transmission is performed * via a batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; struct batadv_tt_orig_list_entry *orig_entry; struct batadv_tt_global_entry *tt_global; const u8 *addr = eth_hdr(skb)->h_dest; tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global) goto out; rcu_read_lock(); hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_entry->orig_node); } rcu_read_unlock(); batadv_tt_global_entry_put(tt_global); out: return ret; } /** * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4 * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV4 flag set. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_ipv4_list, mcast_want_all_ipv4_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6 * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV6 flag set. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_ipv6_list, mcast_want_all_ipv6_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV4 or BATADV_MCAST_WANT_ALL_IPV6 flag set. A * transmission is performed via a batman-adv unicast packet for each such * destination node. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_want_all(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { switch (ntohs(eth_hdr(skb)->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_want_all_ipv4(bat_priv, skb, vid); case ETH_P_IPV6: return batadv_mcast_forw_want_all_ipv6(bat_priv, skb, vid); default: /* we shouldn't be here... */ return NET_XMIT_DROP; } } /** * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4 * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR4 flag unset. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_rtr4_list, mcast_want_all_rtr4_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6 * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR6 flag unset. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_rtr6_list, mcast_want_all_rtr6_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR4 or BATADV_MCAST_WANT_NO_RTR6 flag unset. A * transmission is performed via a batman-adv unicast packet for each such * destination node. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { switch (ntohs(eth_hdr(skb)->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_want_all_rtr4(bat_priv, skb, vid); case ETH_P_IPV6: return batadv_mcast_forw_want_all_rtr6(bat_priv, skb, vid); default: /* we shouldn't be here... */ return NET_XMIT_DROP; } } /** * batadv_mcast_forw_send() - send packet to any detected multicast recipient * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according * want-all flags. A transmission is performed via a batman-adv unicast packet * for each such destination node. * * The given skb is consumed/freed. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int is_routable) { int ret; ret = batadv_mcast_forw_tt(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } ret = batadv_mcast_forw_want_all(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } if (!is_routable) goto skip_mc_router; ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } skip_mc_router: consume_skb(skb); return ret; } /** * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, * orig, has toggled then this method updates the counter and the list * accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node; struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) { atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv4_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) { atomic_dec(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv6_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) { atomic_dec(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_rtr4_node; struct hlist_head *head = &bat_priv->mcast.want_all_rtr4_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag set to unset */ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR4) && orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4) { atomic_inc(&bat_priv->mcast.num_want_all_rtr4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag unset to set */ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR4 && !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4)) { atomic_dec(&bat_priv->mcast.num_want_all_rtr4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_rtr6_node; struct hlist_head *head = &bat_priv->mcast.want_all_rtr6_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag set to unset */ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR6) && orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6) { atomic_inc(&bat_priv->mcast.num_want_all_rtr6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag unset to set */ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR6 && !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6)) { atomic_dec(&bat_priv->mcast.num_want_all_rtr6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV * @enabled: whether the originator has multicast TVLV support enabled * @tvlv_value: tvlv buffer containing the multicast flags * @tvlv_value_len: tvlv buffer length * * Return: multicast flags for the given tvlv buffer */ static u8 batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len) { u8 mcast_flags = BATADV_NO_FLAGS; if (enabled && tvlv_value && tvlv_value_len >= sizeof(mcast_flags)) mcast_flags = *(u8 *)tvlv_value; if (!enabled) { mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4; mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6; } /* remove redundant flags to avoid sending duplicate packets later */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) mcast_flags |= BATADV_MCAST_WANT_NO_RTR4; if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) mcast_flags |= BATADV_MCAST_WANT_NO_RTR6; return mcast_flags; } /** * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data * @tvlv_value_len: tvlv buffer length */ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 flags, void *tvlv_value, u16 tvlv_value_len) { bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); u8 mcast_flags; mcast_flags = batadv_mcast_tvlv_flags_get(orig_mcast_enabled, tvlv_value, tvlv_value_len); spin_lock_bh(&orig->mcast_handler_lock); if (orig_mcast_enabled && !test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } else if (!orig_mcast_enabled && test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { clear_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; spin_unlock_bh(&orig->mcast_handler_lock); } /** * batadv_mcast_init() - initialize the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_init(struct batadv_priv *bat_priv) { batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); batadv_mcast_start_timer(bat_priv); } /** * batadv_mcast_mesh_info_put() - put multicast info into a netlink message * @msg: buffer for the message * @bat_priv: the bat priv with all the soft interface information * * Return: 0 or error code. */ int batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv) { u32 flags = bat_priv->mcast.mla_flags.tvlv_flags; u32 flags_priv = BATADV_NO_FLAGS; if (bat_priv->mcast.mla_flags.bridged) { flags_priv |= BATADV_MCAST_FLAGS_BRIDGED; if (bat_priv->mcast.mla_flags.querier_ipv4.exists) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS; if (bat_priv->mcast.mla_flags.querier_ipv6.exists) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS; if (bat_priv->mcast.mla_flags.querier_ipv4.shadowing) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING; if (bat_priv->mcast.mla_flags.querier_ipv6.shadowing) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING; } if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) || nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv)) return -EMSGSIZE; return 0; } /** * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table * to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @orig_node: originator to dump the multicast flags of * * Return: 0 or error code. */ static int batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_orig_node *orig_node) { void *hdr; hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS); if (!hdr) return -ENOBUFS; genl_dump_check_consistent(cb, hdr); if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig)) { genlmsg_cancel(msg, hdr); return -EMSGSIZE; } if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig_node->capabilities)) { if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, orig_node->mcast_flags)) { genlmsg_cancel(msg, hdr); return -EMSGSIZE; } } genlmsg_end(msg, hdr); return 0; } /** * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags * table to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @hash: hash to dump * @bucket: bucket index to dump * @idx_skip: How many entries to skip * * Return: 0 or error code. */ static int batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_hashtable *hash, unsigned int bucket, long *idx_skip) { struct batadv_orig_node *orig_node; long idx = 0; spin_lock_bh(&hash->list_locks[bucket]); cb->seq = atomic_read(&hash->generation) << 1 | 1; hlist_for_each_entry(orig_node, &hash->table[bucket], hash_entry) { if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig_node->capa_initialized)) continue; if (idx < *idx_skip) goto skip; if (batadv_mcast_flags_dump_entry(msg, portid, cb, orig_node)) { spin_unlock_bh(&hash->list_locks[bucket]); *idx_skip = idx; return -EMSGSIZE; } skip: idx++; } spin_unlock_bh(&hash->list_locks[bucket]); return 0; } /** * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @bat_priv: the bat priv with all the soft interface information * @bucket: current bucket to dump * @idx: index in current bucket to the next entry to dump * * Return: 0 or error code. */ static int __batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, long *bucket, long *idx) { struct batadv_hashtable *hash = bat_priv->orig_hash; long bucket_tmp = *bucket; long idx_tmp = *idx; while (bucket_tmp < hash->size) { if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash, bucket_tmp, &idx_tmp)) break; bucket_tmp++; idx_tmp = 0; } *bucket = bucket_tmp; *idx = idx_tmp; return msg->len; } /** * batadv_mcast_netlink_get_primary() - get primary interface from netlink * callback * @cb: netlink callback structure * @primary_if: the primary interface pointer to return the result in * * Return: 0 or error code. */ static int batadv_mcast_netlink_get_primary(struct netlink_callback *cb, struct batadv_hard_iface **primary_if) { struct batadv_hard_iface *hard_iface = NULL; struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_priv *bat_priv; int ifindex; int ret = 0; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); hard_iface = batadv_primary_if_get_selected(bat_priv); if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } out: dev_put(soft_iface); if (!ret && primary_if) *primary_if = hard_iface; else batadv_hardif_put(hard_iface); return ret; } /** * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket * @msg: buffer for the message * @cb: callback structure containing arguments * * Return: message length. */ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; struct batadv_priv *bat_priv; long *bucket = &cb->args[0]; long *idx = &cb->args[1]; int ret; ret = batadv_mcast_netlink_get_primary(cb, &primary_if); if (ret) return ret; bat_priv = netdev_priv(primary_if->soft_iface); ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx); batadv_hardif_put(primary_if); return ret; } /** * batadv_mcast_free() - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ batadv_mcast_mla_tt_retract(bat_priv, NULL); } /** * batadv_mcast_purge_orig() - reset originator global mcast state modifications * @orig: the originator which is going to get purged */ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; spin_lock_bh(&orig->mcast_handler_lock); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); spin_unlock_bh(&orig->mcast_handler_lock); } |
8 8 8 8 1 1 8 8 8 8 8 7 8 7 1 1 4 4 3 1 4 4 1 1 3 3 4 3 4 2 1 2 2 2 1 1 1 1 1 1 2 2 2 2 2 9 9 9 4 3 1 4 4 4 || // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 */ /* * jfs_imap.c: inode allocation map manager * * Serialization: * Each AG has a simple lock which is used to control the serialization of * the AG level lists. This lock should be taken first whenever an AG * level list will be modified or accessed. * * Each IAG is locked by obtaining the buffer for the IAG page. * * There is also a inode lock for the inode map inode. A read lock needs to * be taken whenever an IAG is read from the map or the global level * information is read. A write lock needs to be taken whenever the global * level information is modified or an atomic operation needs to be used. * * If more than one IAG is read at one time, the read lock may not * be given up until all of the IAG's are read. Otherwise, a deadlock * may occur when trying to obtain the read lock while another thread * holding the read lock is waiting on the IAG already being held. * * The control page of the inode map is read into memory by diMount(). * Thereafter it should only be modified in memory and then it will be * written out when the filesystem is unmounted by diUnmount(). */ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/slab.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_dinode.h" #include "jfs_dmap.h" #include "jfs_imap.h" #include "jfs_metapage.h" #include "jfs_superblock.h" #include "jfs_debug.h" /* * imap locks */ /* iag free list lock */ #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) /* per ag iag list locks */ #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) /* * forward references */ static int diAllocAG(struct inomap *, int, bool, struct inode *); static int diAllocAny(struct inomap *, int, bool, struct inode *); static int diAllocBit(struct inomap *, struct iag *, int); static int diAllocExt(struct inomap *, int, struct inode *); static int diAllocIno(struct inomap *, int, struct inode *); static int diFindFree(u32, int); static int diNewExt(struct inomap *, struct iag *, int); static int diNewIAG(struct inomap *, int *, int, struct metapage **); static void duplicateIXtree(struct super_block *, s64, int, s64 *); static int diIAGRead(struct inomap * imap, int, struct metapage **); static int copy_from_dinode(struct dinode *, struct inode *); static void copy_to_dinode(struct dinode *, struct inode *); /* * NAME: diMount() * * FUNCTION: initialize the incore inode map control structures for * a fileset or aggregate init time. * * the inode map's control structure (dinomap) is * brought in from disk and placed in virtual memory. * * PARAMETERS: * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: * 0 - success * -ENOMEM - insufficient free virtual memory. * -EIO - i/o error. */ int diMount(struct inode *ipimap) { struct inomap *imap; struct metapage *mp; int index; struct dinomap_disk *dinom_le; /* * allocate/initialize the in-memory inode map control structure */ /* allocate the in-memory inode map control structure. */ imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); if (imap == NULL) return -ENOMEM; /* read the on-disk inode map control structure. */ mp = read_metapage(ipimap, IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { kfree(imap); return -EIO; } /* copy the on-disk version to the in-memory version. */ dinom_le = (struct dinomap_disk *) mp->data; imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); for (index = 0; index < MAXAG; index++) { imap->im_agctl[index].inofree = le32_to_cpu(dinom_le->in_agctl[index].inofree); imap->im_agctl[index].extfree = le32_to_cpu(dinom_le->in_agctl[index].extfree); imap->im_agctl[index].numinos = le32_to_cpu(dinom_le->in_agctl[index].numinos); imap->im_agctl[index].numfree = le32_to_cpu(dinom_le->in_agctl[index].numfree); } /* release the buffer. */ release_metapage(mp); /* * allocate/initialize inode allocation map locks */ /* allocate and init iag free list lock */ IAGFREE_LOCK_INIT(imap); /* allocate and init ag list locks */ for (index = 0; index < MAXAG; index++) { AG_LOCK_INIT(imap, index); } /* bind the inode map inode and inode map control structure * to each other. */ imap->im_ipimap = ipimap; JFS_IP(ipimap)->i_imap = imap; return (0); } /* * NAME: diUnmount() * * FUNCTION: write to disk the incore inode map control structures for * a fileset or aggregate at unmount time. * * PARAMETERS: * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: * 0 - success * -ENOMEM - insufficient free virtual memory. * -EIO - i/o error. */ int diUnmount(struct inode *ipimap, int mounterror) { struct inomap *imap = JFS_IP(ipimap)->i_imap; /* * update the on-disk inode map control structure */ if (!(mounterror || isReadOnly(ipimap))) diSync(ipimap); /* * Invalidate the page cache buffers */ truncate_inode_pages(ipimap->i_mapping, 0); /* * free in-memory control structure */ kfree(imap); JFS_IP(ipimap)->i_imap = NULL; return (0); } /* * diSync() */ int diSync(struct inode *ipimap) { struct dinomap_disk *dinom_le; struct inomap *imp = JFS_IP(ipimap)->i_imap; struct metapage *mp; int index; /* * write imap global conrol page */ /* read the on-disk inode map control structure */ mp = get_metapage(ipimap, IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { jfs_err("diSync: get_metapage failed!"); return -EIO; } /* copy the in-memory version to the on-disk version */ dinom_le = (struct dinomap_disk *) mp->data; dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); for (index = 0; index < MAXAG; index++) { dinom_le->in_agctl[index].inofree = cpu_to_le32(imp->im_agctl[index].inofree); dinom_le->in_agctl[index].extfree = cpu_to_le32(imp->im_agctl[index].extfree); dinom_le->in_agctl[index].numinos = cpu_to_le32(imp->im_agctl[index].numinos); dinom_le->in_agctl[index].numfree = cpu_to_le32(imp->im_agctl[index].numfree); } /* write out the control structure */ write_metapage(mp); /* * write out dirty pages of imap */ filemap_write_and_wait(ipimap->i_mapping); diWriteSpecial(ipimap, 0); return (0); } /* * NAME: diRead() * * FUNCTION: initialize an incore inode from disk. * * on entry, the specifed incore inode should itself * specify the disk inode number corresponding to the * incore inode (i.e. i_number should be initialized). * * this routine handles incore inode initialization for * both "special" and "regular" inodes. special inodes * are those required early in the mount process and * require special handling since much of the file system * is not yet initialized. these "special" inodes are * identified by a NULL inode map inode pointer and are * actually initialized by a call to diReadSpecial(). * * for regular inodes, the iag describing the disk inode * is read from disk to determine the inode extent address * for the disk inode. with the inode extent address in * hand, the page of the extent that contains the disk * inode is read and the disk inode is copied to the * incore inode. * * PARAMETERS: * ip - pointer to incore inode to be initialized from disk. * * RETURN VALUES: * 0 - success * -EIO - i/o error. * -ENOMEM - insufficient memory * */ int diRead(struct inode *ip) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); int iagno, ino, extno, rc; struct inode *ipimap; struct dinode *dp; struct iag *iagp; struct metapage *mp; s64 blkno, agstart; struct inomap *imap; int block_offset; int inodes_left; unsigned long pageno; int rel_inode; jfs_info("diRead: ino = %ld", ip->i_ino); ipimap = sbi->ipimap; JFS_IP(ip)->ipimap = ipimap; /* determine the iag number for this inode (number) */ iagno = INOTOIAG(ip->i_ino); /* read the iag */ IREAD_LOCK(ipimap, RDWRLOCK_IMAP); imap = JFS_IP(ipimap)->i_imap; rc = diIAGRead(imap, iagno, &mp); IREAD_UNLOCK(ipimap); if (rc) { jfs_err("diRead: diIAGRead returned %d", rc); return (rc); } iagp = (struct iag *) mp->data; /* determine inode extent that holds the disk inode */ ino = ip->i_ino & (INOSPERIAG - 1); extno = ino >> L2INOSPEREXT; if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || (addressPXD(&iagp->inoext[extno]) == 0)) { release_metapage(mp); return -ESTALE; } /* get disk block number of the page within the inode extent * that holds the disk inode. */ blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); /* get the ag for the iag */ agstart = le64_to_cpu(iagp->agstart); release_metapage(mp); rel_inode = (ino & (INOSPERPAGE - 1)); pageno = blkno >> sbi->l2nbperpage; if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { /* * OS/2 didn't always align inode extents on page boundaries */ inodes_left = (sbi->nbperpage - block_offset) << sbi->l2niperblk; if (rel_inode < inodes_left) rel_inode += block_offset << sbi->l2niperblk; else { pageno += 1; rel_inode -= inodes_left; } } /* read the page of disk inode */ mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); if (!mp) { jfs_err("diRead: read_metapage failed"); return -EIO; } /* locate the disk inode requested */ dp = (struct dinode *) mp->data; dp += rel_inode; if (ip->i_ino != le32_to_cpu(dp->di_number)) { jfs_error(ip->i_sb, "i_ino != di_number\n"); rc = -EIO; } else if (le32_to_cpu(dp->di_nlink) == 0) rc = -ESTALE; else /* copy the disk inode to the in-memory inode */ rc = copy_from_dinode(dp, ip); release_metapage(mp); /* set the ag for the inode */ JFS_IP(ip)->agstart = agstart; JFS_IP(ip)->active_ag = -1; return (rc); } /* * NAME: diReadSpecial() * * FUNCTION: initialize a 'special' inode from disk. * * this routines handles aggregate level inodes. The * inode cache cannot differentiate between the * aggregate inodes and the filesystem inodes, so we * handle these here. We don't actually use the aggregate * inode map, since these inodes are at a fixed location * and in some cases the aggregate inode map isn't initialized * yet. * * PARAMETERS: * sb - filesystem superblock * inum - aggregate inode number * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: * new inode - success * NULL - i/o error. */ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) { struct jfs_sb_info *sbi = JFS_SBI(sb); uint address; struct dinode *dp; struct inode *ip; struct metapage *mp; ip = new_inode(sb); if (ip == NULL) { jfs_err("diReadSpecial: new_inode returned NULL!"); return ip; } if (secondary) { address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; JFS_IP(ip)->ipimap = sbi->ipaimap2; } else { address = AITBL_OFF >> L2PSIZE; JFS_IP(ip)->ipimap = sbi->ipaimap; } ASSERT(inum < INOSPEREXT); ip->i_ino = inum; address += inum >> 3; /* 8 inodes per 4K page */ /* read the page of fixed disk inode (AIT) in raw mode */ mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); if (mp == NULL) { set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); return (NULL); } /* get the pointer to the disk inode of interest */ dp = (struct dinode *) (mp->data); dp += inum % 8; /* 8 inodes per 4K page */ /* copy on-disk inode to in-memory inode */ if ((copy_from_dinode(dp, ip)) != 0) { /* handle bad return by returning NULL for ip */ set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); /* release the page */ release_metapage(mp); return (NULL); } ip->i_mapping->a_ops = &jfs_metapage_aops; mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); /* Allocations to metadata inodes should not affect quotas */ ip->i_flags |= S_NOQUOTA; if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { sbi->gengen = le32_to_cpu(dp->di_gengen); sbi->inostamp = le32_to_cpu(dp->di_inostamp); } /* release the page */ release_metapage(mp); inode_fake_hash(ip); return (ip); } /* * NAME: diWriteSpecial() * * FUNCTION: Write the special inode to disk * * PARAMETERS: * ip - special inode * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: none */ void diWriteSpecial(struct inode *ip, int secondary) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); uint address; struct dinode *dp; ino_t inum = ip->i_ino; struct metapage *mp; if (secondary) address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; else address = AITBL_OFF >> L2PSIZE; ASSERT(inum < INOSPEREXT); address += inum >> 3; /* 8 inodes per 4K page */ /* read the page of fixed disk inode (AIT) in raw mode */ mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); if (mp == NULL) { jfs_err("diWriteSpecial: failed to read aggregate inode extent!"); return; } /* get the pointer to the disk inode of interest */ dp = (struct dinode *) (mp->data); dp += inum % 8; /* 8 inodes per 4K page */ /* copy on-disk inode to in-memory inode */ copy_to_dinode(dp, ip); memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); if (inum == FILESYSTEM_I) dp->di_gengen = cpu_to_le32(sbi->gengen); /* write the page */ write_metapage(mp); } /* * NAME: diFreeSpecial() * * FUNCTION: Free allocated space for special inode */ void diFreeSpecial(struct inode *ip) { if (ip == NULL) { jfs_err("diFreeSpecial called with NULL ip!"); return; } filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, 0); iput(ip); } /* * NAME: diWrite() * * FUNCTION: write the on-disk inode portion of the in-memory inode * to its corresponding on-disk inode. * * on entry, the specifed incore inode should itself * specify the disk inode number corresponding to the * incore inode (i.e. i_number should be initialized). * * the inode contains the inode extent address for the disk * inode. with the inode extent address in hand, the * page of the extent that contains the disk inode is * read and the disk inode portion of the incore inode * is copied to the disk inode. * * PARAMETERS: * tid - transacation id * ip - pointer to incore inode to be written to the inode extent. * * RETURN VALUES: * 0 - success * -EIO - i/o error. */ int diWrite(tid_t tid, struct inode *ip) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); struct jfs_inode_info *jfs_ip = JFS_IP(ip); int rc = 0; s32 ino; struct dinode *dp; s64 blkno; int block_offset; int inodes_left; struct metapage *mp; unsigned long pageno; int rel_inode; int dioffset; struct inode *ipimap; uint type; lid_t lid; struct tlock *ditlck, *tlck; struct linelock *dilinelock, *ilinelock; struct lv *lv; int n; ipimap = jfs_ip->ipimap; ino = ip->i_ino & (INOSPERIAG - 1); if (!addressPXD(&(jfs_ip->ixpxd)) || (lengthPXD(&(jfs_ip->ixpxd)) != JFS_IP(ipimap)->i_imap->im_nbperiext)) { jfs_error(ip->i_sb, "ixpxd invalid\n"); return -EIO; } /* * read the page of disk inode containing the specified inode: */ /* compute the block address of the page */ blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); rel_inode = (ino & (INOSPERPAGE - 1)); pageno = blkno >> sbi->l2nbperpage; if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { /* * OS/2 didn't always align inode extents on page boundaries */ inodes_left = (sbi->nbperpage - block_offset) << sbi->l2niperblk; if (rel_inode < inodes_left) rel_inode += block_offset << sbi->l2niperblk; else { pageno += 1; rel_inode -= inodes_left; } } /* read the page of disk inode */ retry: mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); if (!mp) return -EIO; /* get the pointer to the disk inode */ dp = (struct dinode *) mp->data; dp += rel_inode; dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; /* * acquire transaction lock on the on-disk inode; * N.B. tlock is acquired on ipimap not ip; */ if ((ditlck = txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) goto retry; dilinelock = (struct linelock *) & ditlck->lock; /* * copy btree root from in-memory inode to on-disk inode * * (tlock is taken from inline B+-tree root in in-memory * inode when the B+-tree root is updated, which is pointed * by jfs_ip->blid as well as being on tx tlock list) * * further processing of btree root is based on the copy * in in-memory inode, where txLog() will log from, and, * for xtree root, txUpdateMap() will update map and reset * XAD_NEW bit; */ if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { /* * This is the special xtree inside the directory for storing * the directory table */ xtroot_t *p, *xp; xad_t *xad; jfs_ip->xtlid = 0; tlck = lid_to_tlock(lid); assert(tlck->type & tlckXTREE); tlck->type |= tlckBTROOT; tlck->mp = mp; ilinelock = (struct linelock *) & tlck->lock; /* * copy xtree root from inode to dinode: */ p = &jfs_ip->i_xtroot; xp = (xtroot_t *) &dp->di_dirtable; lv = ilinelock->lv; for (n = 0; n < ilinelock->index; n++, lv++) { memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], lv->length << L2XTSLOTSIZE); } /* reset on-disk (metadata page) xtree XAD_NEW bit */ xad = &xp->xad[XTENTRYSTART]; for (n = XTENTRYSTART; n < le16_to_cpu(xp->header.nextindex); n++, xad++) if (xad->flag & (XAD_NEW | XAD_EXTENDED)) xad->flag &= ~(XAD_NEW | XAD_EXTENDED); } if ((lid = jfs_ip->blid) == 0) goto inlineData; jfs_ip->blid = 0; tlck = lid_to_tlock(lid); type = tlck->type; tlck->type |= tlckBTROOT; tlck->mp = mp; ilinelock = (struct linelock *) & tlck->lock; /* * regular file: 16 byte (XAD slot) granularity */ if (type & tlckXTREE) { xtroot_t *p, *xp; xad_t *xad; /* * copy xtree root from inode to dinode: */ p = &jfs_ip->i_xtroot; xp = &dp->di_xtroot; lv = ilinelock->lv; for (n = 0; n < ilinelock->index; n++, lv++) { memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], lv->length << L2XTSLOTSIZE); } /* reset on-disk (metadata page) xtree XAD_NEW bit */ xad = &xp->xad[XTENTRYSTART]; for (n = XTENTRYSTART; n < le16_to_cpu(xp->header.nextindex); n++, xad++) if (xad->flag & (XAD_NEW | XAD_EXTENDED)) xad->flag &= ~(XAD_NEW | XAD_EXTENDED); } /* * directory: 32 byte (directory entry slot) granularity */ else if (type & tlckDTREE) { dtpage_t *p, *xp; /* * copy dtree root from inode to dinode: */ p = (dtpage_t *) &jfs_ip->i_dtroot; xp = (dtpage_t *) & dp->di_dtroot; lv = ilinelock->lv; for (n = 0; n < ilinelock->index; n++, lv++) { memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], lv->length << L2DTSLOTSIZE); } } else { jfs_err("diWrite: UFO tlock"); } inlineData: /* * copy inline symlink from in-memory inode to on-disk inode */ if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { lv = & dilinelock->lv[dilinelock->index]; lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; lv->length = 2; memcpy(&dp->di_inline_all, jfs_ip->i_inline_all, IDATASIZE); dilinelock->index++; } /* * copy inline data from in-memory inode to on-disk inode: * 128 byte slot granularity */ if (test_cflag(COMMIT_Inlineea, ip)) { lv = & dilinelock->lv[dilinelock->index]; lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; lv->length = 1; memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); dilinelock->index++; clear_cflag(COMMIT_Inlineea, ip); } /* * lock/copy inode base: 128 byte slot granularity */ lv = & dilinelock->lv[dilinelock->index]; lv->offset = dioffset >> L2INODESLOTSIZE; copy_to_dinode(dp, ip); if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { lv->length = 2; memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); } else lv->length = 1; dilinelock->index++; /* release the buffer holding the updated on-disk inode. * the buffer will be later written by commit processing. */ write_metapage(mp); return (rc); } /* * NAME: diFree(ip) * * FUNCTION: free a specified inode from the inode working map * for a fileset or aggregate. * * if the inode to be freed represents the first (only) * free inode within the iag, the iag will be placed on * the ag free inode list. * * freeing the inode will cause the inode extent to be * freed if the inode is the only allocated inode within * the extent. in this case all the disk resource backing * up the inode extent will be freed. in addition, the iag * will be placed on the ag extent free list if the extent * is the first free extent in the iag. if freeing the * extent also means that no free inodes will exist for * the iag, the iag will also be removed from the ag free * inode list. * * the iag describing the inode will be freed if the extent * is to be freed and it is the only backed extent within * the iag. in this case, the iag will be removed from the * ag free extent list and ag free inode list and placed on * the inode map's free iag list. * * a careful update approach is used to provide consistency * in the face of updates to multiple buffers. under this * approach, all required buffers are obtained before making * any updates and are held until all updates are complete. * * PARAMETERS: * ip - inode to be freed. * * RETURN VALUES: * 0 - success * -EIO - i/o error. */ int diFree(struct inode *ip) { int rc; ino_t inum = ip->i_ino; struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; struct metapage *mp, *amp, *bmp, *cmp, *dmp; int iagno, ino, extno, bitno, sword, agno; int back, fwd; u32 bitmap, mask; struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; struct inomap *imap = JFS_IP(ipimap)->i_imap; pxd_t freepxd; tid_t tid; struct inode *iplist[3]; struct tlock *tlck; struct pxd_lock *pxdlock; /* * This is just to suppress compiler warnings. The same logic that * references these variables is used to initialize them. */ aiagp = biagp = ciagp = diagp = NULL; /* get the iag number containing the inode. */ iagno = INOTOIAG(inum); /* make sure that the iag is contained within * the map. */ if (iagno >= imap->im_nextiag) { print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, imap, 32, 0); jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", (uint) inum, iagno, imap->im_nextiag); return -EIO; } /* get the allocation group for this ino. */ agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); /* Lock the AG specific inode map information */ AG_LOCK(imap, agno); /* Obtain read lock in imap inode. Don't release it until we have * read all of the IAG's that we are going to. */ IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* read the iag. */ if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); return (rc); } iagp = (struct iag *) mp->data; /* get the inode number and extent number of the inode within * the iag and the inode number within the extent. */ ino = inum & (INOSPERIAG - 1); extno = ino >> L2INOSPEREXT; bitno = ino & (INOSPEREXT - 1); mask = HIGHORDER >> bitno; if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { jfs_error(ip->i_sb, "wmap shows inode already free\n"); } if (!addressPXD(&iagp->inoext[extno])) { release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, "invalid inoext\n"); return -EIO; } /* compute the bitmap for the extent reflecting the freed inode. */ bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } /* * inode extent still has some inodes or below low water mark: * keep the inode extent; */ if (bitmap || imap->im_agctl[agno].numfree < 96 || (imap->im_agctl[agno].numfree < 288 && (((imap->im_agctl[agno].numfree * 100) / imap->im_agctl[agno].numinos) <= 25))) { /* if the iag currently has no free inodes (i.e., * the inode being freed is the first free inode of iag), * insert the iag at head of the inode free list for the ag. */ if (iagp->nfreeinos == 0) { /* check if there are any iags on the ag inode * free list. if so, read the first one so that * we can link the current iag onto the list at * the head. */ if ((fwd = imap->im_agctl[agno].inofree) >= 0) { /* read the iag that currently is the head * of the list. */ if ((rc = diIAGRead(imap, fwd, &))) { IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); release_metapage(mp); return (rc); } aiagp = (struct iag *) amp->data; /* make current head point back to the iag. */ aiagp->inofreeback = cpu_to_le32(iagno); write_metapage(amp); } /* iag points forward to current head and iag * becomes the new head of the list. */ iagp->inofreefwd = cpu_to_le32(imap->im_agctl[agno].inofree); iagp->inofreeback = cpu_to_le32(-1); imap->im_agctl[agno].inofree = iagno; } IREAD_UNLOCK(ipimap); /* update the free inode summary map for the extent if * freeing the inode means the extent will now have free * inodes (i.e., the inode being freed is the first free * inode of extent), */ if (iagp->wmap[extno] == cpu_to_le32(ONES)) { sword = extno >> L2EXTSPERSUM; bitno = extno & (EXTSPERSUM - 1); iagp->inosmap[sword] &= cpu_to_le32(~(HIGHORDER >> bitno)); } /* update the bitmap. */ iagp->wmap[extno] = cpu_to_le32(bitmap); /* update the free inode counts at the iag, ag and * map level. */ le32_add_cpu(&iagp->nfreeinos, 1); imap->im_agctl[agno].numfree += 1; atomic_inc(&imap->im_numfree); /* release the AG inode map lock */ AG_UNLOCK(imap, agno); /* write the iag */ write_metapage(mp); return (0); } /* * inode extent has become free and above low water mark: * free the inode extent; */ /* * prepare to update iag list(s) (careful update step 1) */ amp = bmp = cmp = dmp = NULL; fwd = back = -1; /* check if the iag currently has no free extents. if so, * it will be placed on the head of the ag extent free list. */ if (iagp->nfreeexts == 0) { /* check if the ag extent free list has any iags. * if so, read the iag at the head of the list now. * this (head) iag will be updated later to reflect * the addition of the current iag at the head of * the list. */ if ((fwd = imap->im_agctl[agno].extfree) >= 0) { if ((rc = diIAGRead(imap, fwd, &))) goto error_out; aiagp = (struct iag *) amp->data; } } else { /* iag has free extents. check if the addition of a free * extent will cause all extents to be free within this * iag. if so, the iag will be removed from the ag extent * free list and placed on the inode map's free iag list. */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { /* in preparation for removing the iag from the * ag extent free list, read the iags preceding * and following the iag on the ag extent free * list. */ if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { if ((rc = diIAGRead(imap, fwd, &))) goto error_out; aiagp = (struct iag *) amp->data; } if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { if ((rc = diIAGRead(imap, back, &bmp))) goto error_out; biagp = (struct iag *) bmp->data; } } } /* remove the iag from the ag inode free list if freeing * this extent cause the iag to have no free inodes. */ if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { int inofreeback = le32_to_cpu(iagp->inofreeback); int inofreefwd = le32_to_cpu(iagp->inofreefwd); /* in preparation for removing the iag from the * ag inode free list, read the iags preceding * and following the iag on the ag inode free * list. before reading these iags, we must make * sure that we already don't have them in hand * from up above, since re-reading an iag (buffer) * we are currently holding would cause a deadlock. */ if (inofreefwd >= 0) { if (inofreefwd == fwd) ciagp = (struct iag *) amp->data; else if (inofreefwd == back) ciagp = (struct iag *) bmp->data; else { if ((rc = diIAGRead(imap, inofreefwd, &cmp))) goto error_out; ciagp = (struct iag *) cmp->data; } assert(ciagp != NULL); } if (inofreeback >= 0) { if (inofreeback == fwd) diagp = (struct iag *) amp->data; else if (inofreeback == back) diagp = (struct iag *) bmp->data; else { if ((rc = diIAGRead(imap, inofreeback, &dmp))) goto error_out; diagp = (struct iag *) dmp->data; } assert(diagp != NULL); } } IREAD_UNLOCK(ipimap); /* * invalidate any page of the inode extent freed from buffer cache; */ freepxd = iagp->inoext[extno]; invalidate_pxd_metapages(ip, freepxd); /* * update iag list(s) (careful update step 2) */ /* add the iag to the ag extent free list if this is the * first free extent for the iag. */ if (iagp->nfreeexts == 0) { if (fwd >= 0) aiagp->extfreeback = cpu_to_le32(iagno); iagp->extfreefwd = cpu_to_le32(imap->im_agctl[agno].extfree); iagp->extfreeback = cpu_to_le32(-1); imap->im_agctl[agno].extfree = iagno; } else { /* remove the iag from the ag extent list if all extents * are now free and place it on the inode map iag free list. */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { if (fwd >= 0) aiagp->extfreeback = iagp->extfreeback; if (back >= 0) biagp->extfreefwd = iagp->extfreefwd; else imap->im_agctl[agno].extfree = le32_to_cpu(iagp->extfreefwd); iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); IAGFREE_LOCK(imap); iagp->iagfree = cpu_to_le32(imap->im_freeiag); imap->im_freeiag = iagno; IAGFREE_UNLOCK(imap); } } /* remove the iag from the ag inode free list if freeing * this extent causes the iag to have no free inodes. */ if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) ciagp->inofreeback = iagp->inofreeback; if ((int) le32_to_cpu(iagp->inofreeback) >= 0) diagp->inofreefwd = iagp->inofreefwd; else imap->im_agctl[agno].inofree = le32_to_cpu(iagp->inofreefwd); iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); } /* update the inode extent address and working map * to reflect the free extent. * the permanent map should have been updated already * for the inode being freed. */ if (iagp->pmap[extno] != 0) { jfs_error(ip->i_sb, "the pmap does not show inode free\n"); } iagp->wmap[extno] = 0; PXDlength(&iagp->inoext[extno], 0); PXDaddress(&iagp->inoext[extno], 0); /* update the free extent and free inode summary maps * to reflect the freed extent. * the inode summary map is marked to indicate no inodes * available for the freed extent. */ sword = extno >> L2EXTSPERSUM; bitno = extno & (EXTSPERSUM - 1); mask = HIGHORDER >> bitno; iagp->inosmap[sword] |= cpu_to_le32(mask); iagp->extsmap[sword] &= cpu_to_le32(~mask); /* update the number of free inodes and number of free extents * for the iag. */ le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); le32_add_cpu(&iagp->nfreeexts, 1); /* update the number of free inodes and backed inodes * at the ag and inode map level. */ imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); imap->im_agctl[agno].numinos -= INOSPEREXT; atomic_sub(INOSPEREXT - 1, &imap->im_numfree); atomic_sub(INOSPEREXT, &imap->im_numinos); if (amp) write_metapage(amp); if (bmp) write_metapage(bmp); if (cmp) write_metapage(cmp); if (dmp) write_metapage(dmp); /* * start transaction to update block allocation map * for the inode extent freed; * * N.B. AG_LOCK is released and iag will be released below, and * other thread may allocate inode from/reusing the ixad freed * BUT with new/different backing inode extent from the extent * to be freed by the transaction; */ tid = txBegin(ipimap->i_sb, COMMIT_FORCE); mutex_lock(&JFS_IP(ipimap)->commit_mutex); /* acquire tlock of the iag page of the freed ixad * to force the page NOHOMEOK (even though no data is * logged from the iag page) until NOREDOPAGE|FREEXTENT log * for the free of the extent is committed; * write FREEXTENT|NOREDOPAGE log record * N.B. linelock is overlaid as freed extent descriptor; */ tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); pxdlock = (struct pxd_lock *) & tlck->lock; pxdlock->flag = mlckFREEPXD; pxdlock->pxd = freepxd; pxdlock->index = 1; write_metapage(mp); iplist[0] = ipimap; /* * logredo needs the IAG number and IAG extent index in order * to ensure that the IMap is consistent. The least disruptive * way to pass these values through to the transaction manager * is in the iplist array. * * It's not pretty, but it works. */ iplist[1] = (struct inode *) (size_t)iagno; iplist[2] = (struct inode *) (size_t)extno; rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* unlock the AG inode map information */ AG_UNLOCK(imap, agno); return (0); error_out: IREAD_UNLOCK(ipimap); if (amp) release_metapage(amp); if (bmp) release_metapage(bmp); if (cmp) release_metapage(cmp); if (dmp) release_metapage(dmp); AG_UNLOCK(imap, agno); release_metapage(mp); return (rc); } /* * There are several places in the diAlloc* routines where we initialize * the inode. */ static inline void diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); ip->i_ino = (iagno << L2INOSPERIAG) + ino; jfs_ip->ixpxd = iagp->inoext[extno]; jfs_ip->agstart = le64_to_cpu(iagp->agstart); jfs_ip->active_ag = -1; } /* * NAME: diAlloc(pip,dir,ip) * * FUNCTION: allocate a disk inode from the inode working map * for a fileset or aggregate. * * PARAMETERS: * pip - pointer to incore inode for the parent inode. * dir - 'true' if the new disk inode is for a directory. * ip - pointer to a new inode * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ int diAlloc(struct inode *pip, bool dir, struct inode *ip) { int rc, ino, iagno, addext, extno, bitno, sword; int nwords, rem, i, agno, dn_numag; u32 mask, inosmap, extsmap; struct inode *ipimap; struct metapage *mp; ino_t inum; struct iag *iagp; struct inomap *imap; /* get the pointers to the inode map inode and the * corresponding imap control structure. */ ipimap = JFS_SBI(pip->i_sb)->ipimap; imap = JFS_IP(ipimap)->i_imap; JFS_IP(ip)->ipimap = ipimap; JFS_IP(ip)->fileset = FILESYSTEM_I; /* for a directory, the allocation policy is to start * at the ag level using the preferred ag. */ if (dir) { agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); AG_LOCK(imap, agno); goto tryag; } /* for files, the policy starts off by trying to allocate from * the same iag containing the parent disk inode: * try to allocate the new disk inode close to the parent disk * inode, using parent disk inode number + 1 as the allocation * hint. (we use a left-to-right policy to attempt to avoid * moving backward on the disk.) compute the hint within the * file system and the iag. */ /* get the ag number of this iag */ agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag; if (agno < 0 || agno > dn_numag) return -EIO; if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { /* * There is an open file actively growing. We want to * allocate new inodes from a different ag to avoid * fragmentation problems. */ agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); AG_LOCK(imap, agno); goto tryag; } inum = pip->i_ino + 1; ino = inum & (INOSPERIAG - 1); /* back off the hint if it is outside of the iag */ if (ino == 0) inum = pip->i_ino; /* lock the AG inode map information */ AG_LOCK(imap, agno); /* Get read lock on imap inode */ IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* get the iag number and read the iag */ iagno = INOTOIAG(inum); if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); return (rc); } iagp = (struct iag *) mp->data; /* determine if new inode extent is allowed to be added to the iag. * new inode extent can be added to the iag if the ag * has less than 32 free disk inodes and the iag has free extents. */ addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); /* * try to allocate from the IAG */ /* check if the inode may be allocated from the iag * (i.e. the inode has free inodes or new extent can be added). */ if (iagp->nfreeinos || addext) { /* determine the extent number of the hint. */ extno = ino >> L2INOSPEREXT; /* check if the extent containing the hint has backed * inodes. if so, try to allocate within this extent. */ if (addressPXD(&iagp->inoext[extno])) { bitno = ino & (INOSPEREXT - 1); if ((bitno = diFindFree(le32_to_cpu(iagp->wmap[extno]), bitno)) < INOSPEREXT) { ino = (extno << L2INOSPEREXT) + bitno; /* a free inode (bit) was found within this * extent, so allocate it. */ rc = diAllocBit(imap, iagp, ino); IREAD_UNLOCK(ipimap); if (rc) { assert(rc == -EIO); } else { /* set the results of the allocation * and write the iag. */ diInitInode(ip, iagno, ino, extno, iagp); mark_metapage_dirty(mp); } release_metapage(mp); /* free the AG lock and return. */ AG_UNLOCK(imap, agno); return (rc); } if (!addext) extno = (extno == EXTSPERIAG - 1) ? 0 : extno + 1; } /* * no free inodes within the extent containing the hint. * * try to allocate from the backed extents following * hint or, if appropriate (i.e. addext is true), allocate * an extent of free inodes at or following the extent * containing the hint. * * the free inode and free extent summary maps are used * here, so determine the starting summary map position * and the number of words we'll have to examine. again, * the approach is to allocate following the hint, so we * might have to initially ignore prior bits of the summary * map that represent extents prior to the extent containing * the hint and later revisit these bits. */ bitno = extno & (EXTSPERSUM - 1); nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; sword = extno >> L2EXTSPERSUM; /* mask any prior bits for the starting words of the * summary map. */ mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno)); inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; /* scan the free inode and free extent summary maps for * free resources. */ for (i = 0; i < nwords; i++) { /* check if this word of the free inode summary * map describes an extent with free inodes. */ if (~inosmap) { /* an extent with free inodes has been * found. determine the extent number * and the inode number within the extent. */ rem = diFindFree(inosmap, 0); extno = (sword << L2EXTSPERSUM) + rem; rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); if (rem >= INOSPEREXT) { IREAD_UNLOCK(ipimap); release_metapage(mp); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, "can't find free bit in wmap\n"); return -EIO; } /* determine the inode number within the * iag and allocate the inode from the * map. */ ino = (extno << L2INOSPEREXT) + rem; rc = diAllocBit(imap, iagp, ino); IREAD_UNLOCK(ipimap); if (rc) assert(rc == -EIO); else { /* set the results of the allocation * and write the iag. */ diInitInode(ip, iagno, ino, extno, iagp); mark_metapage_dirty(mp); } release_metapage(mp); /* free the AG lock and return. */ AG_UNLOCK(imap, agno); return (rc); } /* check if we may allocate an extent of free * inodes and whether this word of the free * extents summary map describes a free extent. */ if (addext && ~extsmap) { /* a free extent has been found. determine * the extent number. */ rem = diFindFree(extsmap, 0); extno = (sword << L2EXTSPERSUM) + rem; /* allocate an extent of free inodes. */ if ((rc = diNewExt(imap, iagp, extno))) { /* if there is no disk space for a * new extent, try to allocate the * disk inode from somewhere else. */ if (rc == -ENOSPC) break; assert(rc == -EIO); } else { /* set the results of the allocation * and write the iag. */ diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); mark_metapage_dirty(mp); } release_metapage(mp); /* free the imap inode & the AG lock & return. */ IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); return (rc); } /* move on to the next set of summary map words. */ sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; inosmap = le32_to_cpu(iagp->inosmap[sword]); extsmap = le32_to_cpu(iagp->extsmap[sword]); } } /* unlock imap inode */ IREAD_UNLOCK(ipimap); /* nothing doing in this iag, so release it. */ release_metapage(mp); tryag: /* * try to allocate anywhere within the same AG as the parent inode. */ rc = diAllocAG(imap, agno, dir, ip); AG_UNLOCK(imap, agno); if (rc != -ENOSPC) return (rc); /* * try to allocate in any AG. */ return (diAllocAny(imap, agno, dir, ip)); } /* * NAME: diAllocAG(imap,agno,dir,ip) * * FUNCTION: allocate a disk inode from the allocation group. * * this routine first determines if a new extent of free * inodes should be added for the allocation group, with * the current request satisfied from this extent. if this * is the case, an attempt will be made to do just that. if * this attempt fails or it has been determined that a new * extent should not be added, an attempt is made to satisfy * the request by allocating an existing (backed) free inode * from the allocation group. * * PRE CONDITION: Already have the AG lock for this AG. * * PARAMETERS: * imap - pointer to inode map control structure. * agno - allocation group to allocate from. * dir - 'true' if the new disk inode is for a directory. * ip - pointer to the new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) { int rc, addext, numfree, numinos; /* get the number of free and the number of backed disk * inodes currently within the ag. */ numfree = imap->im_agctl[agno].numfree; numinos = imap->im_agctl[agno].numinos; if (numfree > numinos) { jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } /* determine if we should allocate a new extent of free inodes * within the ag: for directory inodes, add a new extent * if there are a small number of free inodes or number of free * inodes is a small percentage of the number of backed inodes. */ if (dir) addext = (numfree < 64 || (numfree < 256 && ((numfree * 100) / numinos) <= 20)); else addext = (numfree == 0); /* * try to allocate a new extent of free inodes. */ if (addext) { /* if free space is not available for this new extent, try * below to allocate a free and existing (already backed) * inode from the ag. */ if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) return (rc); } /* * try to allocate an existing free inode from the ag. */ return (diAllocIno(imap, agno, ip)); } /* * NAME: diAllocAny(imap,agno,dir,iap) * * FUNCTION: allocate a disk inode from any other allocation group. * * this routine is called when an allocation attempt within * the primary allocation group has failed. if attempts to * allocate an inode from any allocation group other than the * specified primary group. * * PARAMETERS: * imap - pointer to inode map control structure. * agno - primary allocation group (to avoid). * dir - 'true' if the new disk inode is for a directory. * ip - pointer to a new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) { int ag, rc; int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; /* try to allocate from the ags following agno up to * the maximum ag number. */ for (ag = agno + 1; ag <= maxag; ag++) { AG_LOCK(imap, ag); rc = diAllocAG(imap, ag, dir, ip); AG_UNLOCK(imap, ag); if (rc != -ENOSPC) return (rc); } /* try to allocate from the ags in front of agno. */ for (ag = 0; ag < agno; ag++) { AG_LOCK(imap, ag); rc = diAllocAG(imap, ag, dir, ip); AG_UNLOCK(imap, ag); if (rc != -ENOSPC) return (rc); } /* no free disk inodes. */ return -ENOSPC; } /* * NAME: diAllocIno(imap,agno,ip) * * FUNCTION: allocate a disk inode from the allocation group's free * inode list, returning an error if this free list is * empty (i.e. no iags on the list). * * allocation occurs from the first iag on the list using * the iag's free inode summary map to find the leftmost * free inode in the iag. * * PRE CONDITION: Already have AG lock for this AG. * * PARAMETERS: * imap - pointer to inode map control structure. * agno - allocation group. * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) { int iagno, ino, rc, rem, extno, sword; struct metapage *mp; struct iag *iagp; /* check if there are iags on the ag's free inode list. */ if ((iagno = imap->im_agctl[agno].inofree) < 0) return -ENOSPC; /* obtain read lock on imap inode */ IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); /* read the iag at the head of the list. */ if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(imap->im_ipimap); return (rc); } iagp = (struct iag *) mp->data; /* better be free inodes in this iag if it is on the * list. */ if (!iagp->nfreeinos) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); return -EIO; } /* scan the free inode summary map to find an extent * with free inodes. */ for (sword = 0;; sword++) { if (sword >= SMAPSZ) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, "free inode not found in summary map\n"); return -EIO; } if (~iagp->inosmap[sword]) break; } /* found a extent with free inodes. determine * the extent number. */ rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); if (rem >= EXTSPERSUM) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, "no free extent found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; /* find the first free inode in the extent. */ rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); if (rem >= INOSPEREXT) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, "free inode not found\n"); return -EIO; } /* compute the inode number within the iag. */ ino = (extno << L2INOSPEREXT) + rem; /* allocate the inode. */ rc = diAllocBit(imap, iagp, ino); IREAD_UNLOCK(imap->im_ipimap); if (rc) { release_metapage(mp); return (rc); } /* set the results of the allocation and write the iag. */ diInitInode(ip, iagno, ino, extno, iagp); write_metapage(mp); return (0); } /* * NAME: diAllocExt(imap,agno,ip) * * FUNCTION: add a new extent of free inodes to an iag, allocating * an inode from this extent to satisfy the current allocation * request. * * this routine first tries to find an existing iag with free * extents through the ag free extent list. if list is not * empty, the head of the list will be selected as the home * of the new extent of free inodes. otherwise (the list is * empty), a new iag will be allocated for the ag to contain * the extent. * * once an iag has been selected, the free extent summary map * is used to locate a free extent within the iag and diNewExt() * is called to initialize the extent, with initialization * including the allocation of the first inode of the extent * for the purpose of satisfying this request. * * PARAMETERS: * imap - pointer to inode map control structure. * agno - allocation group number. * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) { int rem, iagno, sword, extno, rc; struct metapage *mp; struct iag *iagp; /* check if the ag has any iags with free extents. if not, * allocate a new iag for the ag. */ if ((iagno = imap->im_agctl[agno].extfree) < 0) { /* If successful, diNewIAG will obtain the read lock on the * imap inode. */ if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { return (rc); } iagp = (struct iag *) mp->data; /* set the ag number if this a brand new iag */ iagp->agstart = cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); } else { /* read the iag. */ IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(imap->im_ipimap); jfs_error(ip->i_sb, "error reading iag\n"); return rc; } iagp = (struct iag *) mp->data; } /* using the free extent summary map, find a free extent. */ for (sword = 0;; sword++) { if (sword >= SMAPSZ) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); jfs_error(ip->i_sb, "free ext summary map not found\n"); return -EIO; } if (~iagp->extsmap[sword]) break; } /* determine the extent number of the free extent. */ rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); if (rem >= EXTSPERSUM) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); jfs_error(ip->i_sb, "free extent not found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; /* initialize the new extent. */ rc = diNewExt(imap, iagp, extno); IREAD_UNLOCK(imap->im_ipimap); if (rc) { /* something bad happened. if a new iag was allocated, * place it back on the inode map's iag free list, and * clear the ag number information. */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { IAGFREE_LOCK(imap); iagp->iagfree = cpu_to_le32(imap->im_freeiag); imap->im_freeiag = iagno; IAGFREE_UNLOCK(imap); } write_metapage(mp); return (rc); } /* set the results of the allocation and write the iag. */ diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); write_metapage(mp); return (0); } /* * NAME: diAllocBit(imap,iagp,ino) * * FUNCTION: allocate a backed inode from an iag. * * this routine performs the mechanics of allocating a * specified inode from a backed extent. * * if the inode to be allocated represents the last free * inode within the iag, the iag will be removed from the * ag free inode list. * * a careful update approach is used to provide consistency * in the face of updates to multiple buffers. under this * approach, all required buffers are obtained before making * any updates and are held all are updates are complete. * * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on * this AG. Must have read lock on imap inode. * * PARAMETERS: * imap - pointer to inode map control structure. * iagp - pointer to iag. * ino - inode number to be allocated within the iag. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) { int extno, bitno, agno, sword, rc; struct metapage *amp = NULL, *bmp = NULL; struct iag *aiagp = NULL, *biagp = NULL; u32 mask; /* check if this is the last free inode within the iag. * if so, it will have to be removed from the ag free * inode list, so get the iags preceding and following * it on the list. */ if (iagp->nfreeinos == cpu_to_le32(1)) { if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { if ((rc = diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), &))) return (rc); aiagp = (struct iag *) amp->data; } if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { if ((rc = diIAGRead(imap, le32_to_cpu(iagp->inofreeback), &bmp))) { if (amp) release_metapage(amp); return (rc); } biagp = (struct iag *) bmp->data; } } /* get the ag number, extent number, inode number within * the extent. */ agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); extno = ino >> L2INOSPEREXT; bitno = ino & (INOSPEREXT - 1); /* compute the mask for setting the map. */ mask = HIGHORDER >> bitno; /* the inode should be free and backed. */ if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || (addressPXD(&iagp->inoext[extno]) == 0)) { if (amp) release_metapage(amp); if (bmp) release_metapage(bmp); jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); return -EIO; } /* mark the inode as allocated in the working map. */ iagp->wmap[extno] |= cpu_to_le32(mask); /* check if all inodes within the extent are now * allocated. if so, update the free inode summary * map to reflect this. */ if (iagp->wmap[extno] == cpu_to_le32(ONES)) { sword = extno >> L2EXTSPERSUM; bitno = extno & (EXTSPERSUM - 1); iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); } /* if this was the last free inode in the iag, remove the * iag from the ag free inode list. */ if (iagp->nfreeinos == cpu_to_le32(1)) { if (amp) { aiagp->inofreeback = iagp->inofreeback; write_metapage(amp); } if (bmp) { biagp->inofreefwd = iagp->inofreefwd; write_metapage(bmp); } else { imap->im_agctl[agno].inofree = le32_to_cpu(iagp->inofreefwd); } iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); } /* update the free inode count at the iag, ag, inode * map levels. */ le32_add_cpu(&iagp->nfreeinos, -1); imap->im_agctl[agno].numfree -= 1; atomic_dec(&imap->im_numfree); return (0); } /* * NAME: diNewExt(imap,iagp,extno) * * FUNCTION: initialize a new extent of inodes for an iag, allocating * the first inode of the extent for use for the current * allocation request. * * disk resources are allocated for the new extent of inodes * and the inodes themselves are initialized to reflect their * existence within the extent (i.e. their inode numbers and * inode extent addresses are set) and their initial state * (mode and link count are set to zero). * * if the iag is new, it is not yet on an ag extent free list * but will now be placed on this list. * * if the allocation of the new extent causes the iag to * have no free extent, the iag will be removed from the * ag extent free list. * * if the iag has no free backed inodes, it will be placed * on the ag free inode list, since the addition of the new * extent will now cause it to have free inodes. * * a careful update approach is used to provide consistency * (i.e. list consistency) in the face of updates to multiple * buffers. under this approach, all required buffers are * obtained before making any updates and are held until all * updates are complete. * * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on * this AG. Must have read lock on imap inode. * * PARAMETERS: * imap - pointer to inode map control structure. * iagp - pointer to iag. * extno - extent number. * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. */ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) { int agno, iagno, fwd, back, freei = 0, sword, rc; struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; struct metapage *amp, *bmp, *cmp, *dmp; struct inode *ipimap; s64 blkno, hint; int i, j; u32 mask; ino_t ino; struct dinode *dp; struct jfs_sb_info *sbi; /* better have free extents. */ if (!iagp->nfreeexts) { jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); return -EIO; } /* get the inode map inode. */ ipimap = imap->im_ipimap; sbi = JFS_SBI(ipimap->i_sb); amp = bmp = cmp = NULL; /* get the ag and iag numbers for this iag. */ agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); iagno = le32_to_cpu(iagp->iagnum); /* check if this is the last free extent within the * iag. if so, the iag must be removed from the ag * free extent list, so get the iags preceding and * following the iag on this list. */ if (iagp->nfreeexts == cpu_to_le32(1)) { if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { if ((rc = diIAGRead(imap, fwd, &))) return (rc); aiagp = (struct iag *) amp->data; } if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { if ((rc = diIAGRead(imap, back, &bmp))) goto error_out; biagp = (struct iag *) bmp->data; } } else { /* the iag has free extents. if all extents are free * (as is the case for a newly allocated iag), the iag * must be added to the ag free extent list, so get * the iag at the head of the list in preparation for * adding this iag to this list. */ fwd = back = -1; if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { if ((fwd = imap->im_agctl[agno].extfree) >= 0) { if ((rc = diIAGRead(imap, fwd, &))) goto error_out; aiagp = (struct iag *) amp->data; } } } /* check if the iag has no free inodes. if so, the iag * will have to be added to the ag free inode list, so get * the iag at the head of the list in preparation for * adding this iag to this list. in doing this, we must * check if we already have the iag at the head of * the list in hand. */ if (iagp->nfreeinos == 0) { freei = imap->im_agctl[agno].inofree; if (freei >= 0) { if (freei == fwd) { ciagp = aiagp; } else if (freei == back) { ciagp = biagp; } else { if ((rc = diIAGRead(imap, freei, &cmp))) goto error_out; ciagp = (struct iag *) cmp->data; } if (ciagp == NULL) { jfs_error(imap->im_ipimap->i_sb, "ciagp == NULL\n"); rc = -EIO; goto error_out; } } } /* allocate disk space for the inode extent. */ if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; else hint = addressPXD(&iagp->inoext[extno - 1]) + lengthPXD(&iagp->inoext[extno - 1]) - 1; if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) goto error_out; /* compute the inode number of the first inode within the * extent. */ ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); /* initialize the inodes within the newly allocated extent a * page at a time. */ for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { /* get a buffer for this page of disk inodes. */ dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); if (dmp == NULL) { rc = -EIO; goto error_out; } dp = (struct dinode *) dmp->data; /* initialize the inode number, mode, link count and * inode extent address. */ for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { dp->di_inostamp = cpu_to_le32(sbi->inostamp); dp->di_number = cpu_to_le32(ino); dp->di_fileset = cpu_to_le32(FILESYSTEM_I); dp->di_mode = 0; dp->di_nlink = 0; PXDaddress(&(dp->di_ixpxd), blkno); PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); } write_metapage(dmp); } /* if this is the last free extent within the iag, remove the * iag from the ag free extent list. */ if (iagp->nfreeexts == cpu_to_le32(1)) { if (fwd >= 0) aiagp->extfreeback = iagp->extfreeback; if (back >= 0) biagp->extfreefwd = iagp->extfreefwd; else imap->im_agctl[agno].extfree = le32_to_cpu(iagp->extfreefwd); iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); } else { /* if the iag has all free extents (newly allocated iag), * add the iag to the ag free extent list. */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { if (fwd >= 0) aiagp->extfreeback = cpu_to_le32(iagno); iagp->extfreefwd = cpu_to_le32(fwd); iagp->extfreeback = cpu_to_le32(-1); imap->im_agctl[agno].extfree = iagno; } } /* if the iag has no free inodes, add the iag to the * ag free inode list. */ if (iagp->nfreeinos == 0) { if (freei >= 0) ciagp->inofreeback = cpu_to_le32(iagno); iagp->inofreefwd = cpu_to_le32(imap->im_agctl[agno].inofree); iagp->inofreeback = cpu_to_le32(-1); imap->im_agctl[agno].inofree = iagno; } /* initialize the extent descriptor of the extent. */ PXDlength(&iagp->inoext[extno], imap->im_nbperiext); PXDaddress(&iagp->inoext[extno], blkno); /* initialize the working and persistent map of the extent. * the working map will be initialized such that * it indicates the first inode of the extent is allocated. */ iagp->wmap[extno] = cpu_to_le32(HIGHORDER); iagp->pmap[extno] = 0; /* update the free inode and free extent summary maps * for the extent to indicate the extent has free inodes * and no longer represents a free extent. */ sword = extno >> L2EXTSPERSUM; mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); iagp->extsmap[sword] |= cpu_to_le32(mask); iagp->inosmap[sword] &= cpu_to_le32(~mask); /* update the free inode and free extent counts for the * iag. */ le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); le32_add_cpu(&iagp->nfreeexts, -1); /* update the free and backed inode counts for the ag. */ imap->im_agctl[agno].numfree += (INOSPEREXT - 1); imap->im_agctl[agno].numinos += INOSPEREXT; /* update the free and backed inode counts for the inode map. */ atomic_add(INOSPEREXT - 1, &imap->im_numfree); atomic_add(INOSPEREXT, &imap->im_numinos); /* write the iags. */ if (amp) write_metapage(amp); if (bmp) write_metapage(bmp); if (cmp) write_metapage(cmp); return (0); error_out: /* release the iags. */ if (amp) release_metapage(amp); if (bmp) release_metapage(bmp); if (cmp) release_metapage(cmp); return (rc); } /* * NAME: diNewIAG(imap,iagnop,agno) * * FUNCTION: allocate a new iag for an allocation group. * * first tries to allocate the iag from the inode map * iagfree list: * if the list has free iags, the head of the list is removed * and returned to satisfy the request. * if the inode map's iag free list is empty, the inode map * is extended to hold a new iag. this new iag is initialized * and returned to satisfy the request. * * PARAMETERS: * imap - pointer to inode map control structure. * iagnop - pointer to an iag number set with the number of the * newly allocated iag upon successful return. * agno - allocation group number. * bpp - Buffer pointer to be filled in with new IAG's buffer * * RETURN VALUES: * 0 - success. * -ENOSPC - insufficient disk resources. * -EIO - i/o error. * * serialization: * AG lock held on entry/exit; * write lock on the map is held inside; * read lock on the map is held on successful completion; * * note: new iag transaction: * . synchronously write iag; * . write log of xtree and inode of imap; * . commit; * . synchronous write of xtree (right to left, bottom to top); * . at start of logredo(): init in-memory imap with one additional iag page; * . at end of logredo(): re-read imap inode to determine * new imap size; */ static int diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) { int rc; int iagno, i, xlen; struct inode *ipimap; struct super_block *sb; struct jfs_sb_info *sbi; struct metapage *mp; struct iag *iagp; s64 xaddr = 0; s64 blkno; tid_t tid; struct inode *iplist[1]; /* pick up pointers to the inode map and mount inodes */ ipimap = imap->im_ipimap; sb = ipimap->i_sb; sbi = JFS_SBI(sb); /* acquire the free iag lock */ IAGFREE_LOCK(imap); /* if there are any iags on the inode map free iag list, * allocate the iag from the head of the list. */ if (imap->im_freeiag >= 0) { /* pick up the iag number at the head of the list */ iagno = imap->im_freeiag; /* determine the logical block number of the iag */ blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); } else { /* no free iags. the inode map will have to be extented * to include a new iag. */ /* acquire inode map lock */ IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { IWRITE_UNLOCK(ipimap); IAGFREE_UNLOCK(imap); jfs_error(imap->im_ipimap->i_sb, "ipimap->i_size is wrong\n"); return -EIO; } /* get the next available iag number */ iagno = imap->im_nextiag; /* make sure that we have not exceeded the maximum inode * number limit. */ if (iagno > (MAXIAGS - 1)) { /* release the inode map lock */ IWRITE_UNLOCK(ipimap); rc = -ENOSPC; goto out; } /* * synchronously append new iag page. */ /* determine the logical address of iag page to append */ blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); /* Allocate extent for new iag page */ xlen = sbi->nbperpage; if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { /* release the inode map lock */ IWRITE_UNLOCK(ipimap); goto out; } /* * start transaction of update of the inode map * addressing structure pointing to the new iag page; */ tid = txBegin(sb, COMMIT_FORCE); mutex_lock(&JFS_IP(ipimap)->commit_mutex); /* update the inode map addressing structure to point to it */ if ((rc = xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { txEnd(tid); mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* Free the blocks allocated for the iag since it was * not successfully added to the inode map */ dbFree(ipimap, xaddr, (s64) xlen); /* release the inode map lock */ IWRITE_UNLOCK(ipimap); goto out; } /* update the inode map's inode to reflect the extension */ ipimap->i_size += PSIZE; inode_add_bytes(ipimap, PSIZE); /* assign a buffer for the page */ mp = get_metapage(ipimap, blkno, PSIZE, 0); if (!mp) { /* * This is very unlikely since we just created the * extent, but let's try to handle it correctly */ xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, COMMIT_PWMAP); txAbort(tid, 0); txEnd(tid); mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* release the inode map lock */ IWRITE_UNLOCK(ipimap); rc = -EIO; goto out; } iagp = (struct iag *) mp->data; /* init the iag */ memset(iagp, 0, sizeof(struct iag)); iagp->iagnum = cpu_to_le32(iagno); iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); iagp->iagfree = cpu_to_le32(-1); iagp->nfreeinos = 0; iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); /* initialize the free inode summary map (free extent * summary map initialization handled by bzero). */ for (i = 0; i < SMAPSZ; i++) iagp->inosmap[i] = cpu_to_le32(ONES); /* * Write and sync the metapage */ flush_metapage(mp); /* * txCommit(COMMIT_FORCE) will synchronously write address * index pages and inode after commit in careful update order * of address index pages (right to left, bottom up); */ iplist[0] = ipimap; rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); mutex_unlock(&JFS_IP(ipimap)->commit_mutex); duplicateIXtree(sb, blkno, xlen, &xaddr); /* update the next available iag number */ imap->im_nextiag += 1; /* Add the iag to the iag free list so we don't lose the iag * if a failure happens now. */ imap->im_freeiag = iagno; /* Until we have logredo working, we want the imap inode & * control page to be up to date. */ diSync(ipimap); /* release the inode map lock */ IWRITE_UNLOCK(ipimap); } /* obtain read lock on map */ IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* read the iag */ if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(ipimap); rc = -EIO; goto out; } iagp = (struct iag *) mp->data; /* remove the iag from the iag free list */ imap->im_freeiag = le32_to_cpu(iagp->iagfree); iagp->iagfree = cpu_to_le32(-1); /* set the return iag number and buffer pointer */ *iagnop = iagno; *mpp = mp; out: /* release the iag free lock */ IAGFREE_UNLOCK(imap); return (rc); } /* * NAME: diIAGRead() * * FUNCTION: get the buffer for the specified iag within a fileset * or aggregate inode map. * * PARAMETERS: * imap - pointer to inode map control structure. * iagno - iag number. * bpp - point to buffer pointer to be filled in on successful * exit. * * SERIALIZATION: * must have read lock on imap inode * (When called by diExtendFS, the filesystem is quiesced, therefore * the read lock is unnecessary.) * * RETURN VALUES: * 0 - success. * -EIO - i/o error. */ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) { struct inode *ipimap = imap->im_ipimap; s64 blkno; /* compute the logical block number of the iag. */ blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); /* read the iag. */ *mpp = read_metapage(ipimap, blkno, PSIZE, 0); if (*mpp == NULL) { return -EIO; } return (0); } /* * NAME: diFindFree() * * FUNCTION: find the first free bit in a word starting at * the specified bit position. * * PARAMETERS: * word - word to be examined. * start - starting bit position. * * RETURN VALUES: * bit position of first free bit in the word or 32 if * no free bits were found. */ static int diFindFree(u32 word, int start) { int bitno; assert(start < 32); /* scan the word for the first free bit. */ for (word <<= start, bitno = start; bitno < 32; bitno++, word <<= 1) { if ((word & HIGHORDER) == 0) break; } return (bitno); } /* * NAME: diUpdatePMap() * * FUNCTION: Update the persistent map in an IAG for the allocation or * freeing of the specified inode. * * PRE CONDITIONS: Working map has already been updated for allocate. * * PARAMETERS: * ipimap - Incore inode map inode * inum - Number of inode to mark in permanent map * is_free - If 'true' indicates inode should be marked freed, otherwise * indicates inode should be marked allocated. * * RETURN VALUES: * 0 for success */ int diUpdatePMap(struct inode *ipimap, unsigned long inum, bool is_free, struct tblock * tblk) { int rc; struct iag *iagp; struct metapage *mp; int iagno, ino, extno, bitno; struct inomap *imap; u32 mask; struct jfs_log *log; int lsn, difft, diffp; unsigned long flags; imap = JFS_IP(ipimap)->i_imap; /* get the iag number containing the inode */ iagno = INOTOIAG(inum); /* make sure that the iag is contained within the map */ if (iagno >= imap->im_nextiag) { jfs_error(ipimap->i_sb, "the iag is outside the map\n"); return -EIO; } /* read the iag */ IREAD_LOCK(ipimap, RDWRLOCK_IMAP); rc = diIAGRead(imap, iagno, &mp); IREAD_UNLOCK(ipimap); if (rc) return (rc); metapage_wait_for_io(mp); iagp = (struct iag *) mp->data; /* get the inode number and extent number of the inode within * the iag and the inode number within the extent. */ ino = inum & (INOSPERIAG - 1); extno = ino >> L2INOSPEREXT; bitno = ino & (INOSPEREXT - 1); mask = HIGHORDER >> bitno; /* * mark the inode free in persistent map: */ if (is_free) { /* The inode should have been allocated both in working * map and in persistent map; * the inode will be freed from working map at the release * of last reference release; */ if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { jfs_error(ipimap->i_sb, "inode %ld not marked as allocated in wmap!\n", inum); } if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { jfs_error(ipimap->i_sb, "inode %ld not marked as allocated in pmap!\n", inum); } /* update the bitmap for the extent of the freed inode */ iagp->pmap[extno] &= cpu_to_le32(~mask); } /* * mark the inode allocated in persistent map: */ else { /* The inode should be already allocated in the working map * and should be free in persistent map; */ if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { release_metapage(mp); jfs_error(ipimap->i_sb, "the inode is not allocated in the working map\n"); return -EIO; } if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { release_metapage(mp); jfs_error(ipimap->i_sb, "the inode is not free in the persistent map\n"); return -EIO; } /* update the bitmap for the extent of the allocated inode */ iagp->pmap[extno] |= cpu_to_le32(mask); } /* * update iag lsn */ lsn = tblk->lsn; log = JFS_SBI(tblk->sb)->log; LOGSYNC_LOCK(log, flags); if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(difft, lsn, log); logdiff(diffp, mp->lsn, log); if (difft < diffp) { mp->lsn = lsn; /* move mp after tblock in logsync list */ list_move(&mp->synclist, &tblk->synclist); } /* inherit younger/larger clsn */ assert(mp->clsn); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; } else { mp->log = log; mp->lsn = lsn; /* insert mp after tblock in logsync list */ log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; } LOGSYNC_UNLOCK(log, flags); write_metapage(mp); return (0); } /* * diExtendFS() * * function: update imap for extendfs(); * * note: AG size has been increased s.t. each k old contiguous AGs are * coalesced into a new AG; */ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) { int rc, rcx = 0; struct inomap *imap = JFS_IP(ipimap)->i_imap; struct iag *iagp = NULL, *hiagp = NULL; struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; struct metapage *bp, *hbp; int i, n, head; int numinos, xnuminos = 0, xnumfree = 0; s64 agstart; jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", imap->im_nextiag, atomic_read(&imap->im_numinos), atomic_read(&imap->im_numfree)); /* * reconstruct imap * * coalesce contiguous k (newAGSize/oldAGSize) AGs; * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; * note: new AG size = old AG size * (2**x). */ /* init per AG control information im_agctl[] */ for (i = 0; i < MAXAG; i++) { imap->im_agctl[i].inofree = -1; imap->im_agctl[i].extfree = -1; imap->im_agctl[i].numinos = 0; /* number of backed inodes */ imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ } /* * process each iag page of the map. * * rebuild AG Free Inode List, AG Free Inode Extent List; */ for (i = 0; i < imap->im_nextiag; i++) { if ((rc = diIAGRead(imap, i, &bp))) { rcx = rc; continue; } iagp = (struct iag *) bp->data; if (le32_to_cpu(iagp->iagnum) != i) { release_metapage(bp); jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); return -EIO; } /* leave free iag in the free iag list */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { release_metapage(bp); continue; } agstart = le64_to_cpu(iagp->agstart); n = agstart >> mp->db_agl2size; iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); /* compute backed inodes */ numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) << L2INOSPEREXT; if (numinos > 0) { /* merge AG backed inodes */ imap->im_agctl[n].numinos += numinos; xnuminos += numinos; } /* if any backed free inodes, insert at AG free inode list */ if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { if ((head = imap->im_agctl[n].inofree) == -1) { iagp->inofreefwd = cpu_to_le32(-1); iagp->inofreeback = cpu_to_le32(-1); } else { if ((rc = diIAGRead(imap, head, &hbp))) { rcx = rc; goto nextiag; } hiagp = (struct iag *) hbp->data; hiagp->inofreeback = iagp->iagnum; iagp->inofreefwd = cpu_to_le32(head); iagp->inofreeback = cpu_to_le32(-1); write_metapage(hbp); } imap->im_agctl[n].inofree = le32_to_cpu(iagp->iagnum); /* merge AG backed free inodes */ imap->im_agctl[n].numfree += le32_to_cpu(iagp->nfreeinos); xnumfree += le32_to_cpu(iagp->nfreeinos); } /* if any free extents, insert at AG free extent list */ if (le32_to_cpu(iagp->nfreeexts) > 0) { if ((head = imap->im_agctl[n].extfree) == -1) { iagp->extfreefwd = cpu_to_le32(-1); iagp->extfreeback = cpu_to_le32(-1); } else { if ((rc = diIAGRead(imap, head, &hbp))) { rcx = rc; goto nextiag; } hiagp = (struct iag *) hbp->data; hiagp->extfreeback = iagp->iagnum; iagp->extfreefwd = cpu_to_le32(head); iagp->extfreeback = cpu_to_le32(-1); write_metapage(hbp); } imap->im_agctl[n].extfree = le32_to_cpu(iagp->iagnum); } nextiag: write_metapage(bp); } if (xnuminos != atomic_read(&imap->im_numinos) || xnumfree != atomic_read(&imap->im_numfree)) { jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); return -EIO; } return rcx; } /* * duplicateIXtree() * * serialization: IWRITE_LOCK held on entry/exit * * note: shadow page with regular inode (rel.2); */ static void duplicateIXtree(struct super_block *sb, s64 blkno, int xlen, s64 *xaddr) { struct jfs_superblock *j_sb; struct buffer_head *bh; struct inode *ip; tid_t tid; /* if AIT2 ipmap2 is bad, do not try to update it */ if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ return; ip = diReadSpecial(sb, FILESYSTEM_I, 1); if (ip == NULL) { JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; if (readSuper(sb, &bh)) return; j_sb = (struct jfs_superblock *)bh->b_data; j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); mark_buffer_dirty(bh); sync_dirty_buffer(bh); brelse(bh); return; } /* start transaction */ tid = txBegin(sb, COMMIT_FORCE); /* update the inode map addressing structure to point to it */ if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; txAbort(tid, 1); goto cleanup; } /* update the inode map's inode to reflect the extension */ ip->i_size += PSIZE; inode_add_bytes(ip, PSIZE); txCommit(tid, 1, &ip, COMMIT_FORCE); cleanup: txEnd(tid); diFreeSpecial(ip); } /* * NAME: copy_from_dinode() * * FUNCTION: Copies inode info from disk inode to in-memory inode * * RETURN VALUES: * 0 - success * -ENOMEM - insufficient memory */ static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); jfs_set_inode_flags(ip); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; if (sbi->umask != -1) { ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); /* For directories, add x permission if r is allowed by umask */ if (S_ISDIR(ip->i_mode)) { if (ip->i_mode & 0400) ip->i_mode |= 0100; if (ip->i_mode & 0040) ip->i_mode |= 0010; if (ip->i_mode & 0004) ip->i_mode |= 0001; } } set_nlink(ip, le32_to_cpu(dip->di_nlink)); jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); if (!uid_valid(sbi->uid)) ip->i_uid = jfs_ip->saved_uid; else { ip->i_uid = sbi->uid; } jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); if (!gid_valid(sbi->gid)) ip->i_gid = jfs_ip->saved_gid; else { ip->i_gid = sbi->gid; } ip->i_size = le64_to_cpu(dip->di_size); inode_set_atime(ip, le32_to_cpu(dip->di_atime.tv_sec), le32_to_cpu(dip->di_atime.tv_nsec)); inode_set_mtime(ip, le32_to_cpu(dip->di_mtime.tv_sec), le32_to_cpu(dip->di_mtime.tv_nsec)); inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec), le32_to_cpu(dip->di_ctime.tv_nsec)); ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); ip->i_generation = le32_to_cpu(dip->di_gen); jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ jfs_ip->acl = dip->di_acl; /* as are dxd's */ jfs_ip->ea = dip->di_ea; jfs_ip->next_index = le32_to_cpu(dip->di_next_index); jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); jfs_ip->acltype = le32_to_cpu(dip->di_acltype); if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { jfs_ip->dev = le32_to_cpu(dip->di_rdev); ip->i_rdev = new_decode_dev(jfs_ip->dev); } if (S_ISDIR(ip->i_mode)) { memcpy(&jfs_ip->u.dir, &dip->u._dir, 384); } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); } else memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); /* Zero the in-memory-only stuff */ jfs_ip->cflag = 0; jfs_ip->btindex = 0; jfs_ip->btorder = 0; jfs_ip->bxflag = 0; jfs_ip->blid = 0; jfs_ip->atlhead = 0; jfs_ip->atltail = 0; jfs_ip->xtlid = 0; return (0); } /* * NAME: copy_to_dinode() * * FUNCTION: Copies inode info from in-memory inode to disk inode */ static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); dip->di_fileset = cpu_to_le32(jfs_ip->fileset); dip->di_inostamp = cpu_to_le32(sbi->inostamp); dip->di_number = cpu_to_le32(ip->i_ino); dip->di_gen = cpu_to_le32(ip->i_generation); dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); if (!uid_valid(sbi->uid)) dip->di_uid = cpu_to_le32(i_uid_read(ip)); else dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, jfs_ip->saved_uid)); if (!gid_valid(sbi->gid)) dip->di_gid = cpu_to_le32(i_gid_read(ip)); else dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, jfs_ip->saved_gid)); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones */ if (sbi->umask == -1) dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); else /* Leave the original permissions alone */ dip->di_mode = cpu_to_le32(jfs_ip->mode2); dip->di_atime.tv_sec = cpu_to_le32(inode_get_atime_sec(ip)); dip->di_atime.tv_nsec = cpu_to_le32(inode_get_atime_nsec(ip)); dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime_sec(ip)); dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime_nsec(ip)); dip->di_mtime.tv_sec = cpu_to_le32(inode_get_mtime_sec(ip)); dip->di_mtime.tv_nsec = cpu_to_le32(inode_get_mtime_nsec(ip)); dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ dip->di_acl = jfs_ip->acl; /* as are dxd's */ dip->di_ea = jfs_ip->ea; dip->di_next_index = cpu_to_le32(jfs_ip->next_index); dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); dip->di_otime.tv_nsec = 0; dip->di_acltype = cpu_to_le32(jfs_ip->acltype); if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) dip->di_rdev = cpu_to_le32(jfs_ip->dev); } |
1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MLD_H #define LINUX_MLD_H #include <linux/in6.h> #include <linux/icmpv6.h> /* MLDv1 Query/Report/Done */ struct mld_msg { struct icmp6hdr mld_hdr; struct in6_addr mld_mca; }; #define mld_type mld_hdr.icmp6_type #define mld_code mld_hdr.icmp6_code #define mld_cksum mld_hdr.icmp6_cksum #define mld_maxdelay mld_hdr.icmp6_maxdelay #define mld_reserved mld_hdr.icmp6_dataun.un_data16[1] /* Multicast Listener Discovery version 2 headers */ /* MLDv2 Report */ struct mld2_grec { __u8 grec_type; __u8 grec_auxwords; __be16 grec_nsrcs; struct in6_addr grec_mca; struct in6_addr grec_src[]; }; struct mld2_report { struct icmp6hdr mld2r_hdr; struct mld2_grec mld2r_grec[]; }; #define mld2r_type mld2r_hdr.icmp6_type #define mld2r_resv1 mld2r_hdr.icmp6_code #define mld2r_cksum mld2r_hdr.icmp6_cksum #define mld2r_resv2 mld2r_hdr.icmp6_dataun.un_data16[0] #define mld2r_ngrec mld2r_hdr.icmp6_dataun.un_data16[1] /* MLDv2 Query */ struct mld2_query { struct icmp6hdr mld2q_hdr; struct in6_addr mld2q_mca; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 mld2q_qrv:3, mld2q_suppress:1, mld2q_resv2:4; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 mld2q_resv2:4, mld2q_suppress:1, mld2q_qrv:3; #else #error "Please fix <asm/byteorder.h>" #endif __u8 mld2q_qqic; __be16 mld2q_nsrcs; struct in6_addr mld2q_srcs[]; }; #define mld2q_type mld2q_hdr.icmp6_type #define mld2q_code mld2q_hdr.icmp6_code #define mld2q_cksum mld2q_hdr.icmp6_cksum #define mld2q_mrc mld2q_hdr.icmp6_maxdelay #define mld2q_resv1 mld2q_hdr.icmp6_dataun.un_data16[1] /* RFC3810, 5.1.3. Maximum Response Code: * * If Maximum Response Code >= 32768, Maximum Response Code represents a * floating-point value as follows: * * 0 1 2 3 4 5 6 7 8 9 A B C D E F * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define MLDV2_MRC_EXP(value) (((value) >> 12) & 0x0007) #define MLDV2_MRC_MAN(value) ((value) & 0x0fff) /* RFC3810, 5.1.9. QQIC (Querier's Query Interval Code): * * If QQIC >= 128, QQIC represents a floating-point value as follows: * * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+ */ #define MLDV2_QQIC_EXP(value) (((value) >> 4) & 0x07) #define MLDV2_QQIC_MAN(value) ((value) & 0x0f) #define MLD_EXP_MIN_LIMIT 32768UL #define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1) #define MLD_MAX_QUEUE 8 #define MLD_MAX_SKBS 32 static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2) { /* RFC3810, 5.1.3. Maximum Response Code */ unsigned long ret, mc_mrc = ntohs(mlh2->mld2q_mrc); if (mc_mrc < MLD_EXP_MIN_LIMIT) { ret = mc_mrc; } else { unsigned long mc_man, mc_exp; mc_exp = MLDV2_MRC_EXP(mc_mrc); mc_man = MLDV2_MRC_MAN(mc_mrc); ret = (mc_man | 0x1000) << (mc_exp + 3); } return ret; } #endif |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | // SPDX-License-Identifier: GPL-2.0-only /* ipv6header match - matches IPv6 packets based on whether they contain certain headers */ /* Original idea: Brad Chapman * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; unsigned int temp; int len; u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /* type of the 1st exthdr */ nexthdr = ipv6_hdr(skb)->nexthdr; /* pointer to the 1st exthdr */ ptr = sizeof(struct ipv6hdr); /* available length */ len = skb->len - ptr; temp = 0; while (nf_ip6_ext_hdr(nexthdr)) { const struct ipv6_opt_hdr *hp; struct ipv6_opt_hdr _hdr; int hdrlen; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; break; } /* Is there enough space for the next ext header? */ if (len < (int)sizeof(struct ipv6_opt_hdr)) return false; /* ESP -> evaluate */ if (nexthdr == NEXTHDR_ESP) { temp |= MASK_ESP; break; } hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (!hp) { par->hotdrop = true; return false; } /* Calculate the header length */ if (nexthdr == NEXTHDR_FRAGMENT) hdrlen = 8; else if (nexthdr == NEXTHDR_AUTH) hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); /* set the flag */ switch (nexthdr) { case NEXTHDR_HOP: temp |= MASK_HOPOPTS; break; case NEXTHDR_ROUTING: temp |= MASK_ROUTING; break; case NEXTHDR_FRAGMENT: temp |= MASK_FRAGMENT; break; case NEXTHDR_AUTH: temp |= MASK_AH; break; case NEXTHDR_DEST: temp |= MASK_DSTOPTS; break; default: return false; } nexthdr = hp->nexthdr; len -= hdrlen; ptr += hdrlen; if (ptr > skb->len) break; } if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP) temp |= MASK_PROTO; if (info->modeflag) return !((temp ^ info->matchflags ^ info->invflags) & info->matchflags); else { if (info->invflags) return temp != info->matchflags; else return temp == info->matchflags; } } static int ipv6header_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && info->invflags != 0xFF) return -EINVAL; return 0; } static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", .family = NFPROTO_IPV6, .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = ipv6header_mt6_check, .destroy = NULL, .me = THIS_MODULE, }; static int __init ipv6header_mt6_init(void) { return xt_register_match(&ipv6header_mt6_reg); } static void __exit ipv6header_mt6_exit(void) { xt_unregister_match(&ipv6header_mt6_reg); } module_init(ipv6header_mt6_init); module_exit(ipv6header_mt6_exit); |
39 42 11 11 1 10 11 11 10 1 1 10 10 10 8 9 10 9 8 10 10 10 6 3 9 1 10 8 9 8 10 2 1 2 1 1 3 2 1 2 3 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 2 1 2 1 24 20 17 2 17 1 18 16 17 || // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Matthias Schiffer */ #include "netlink.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/genetlink.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/types.h> #include <net/genetlink.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/sock.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "multicast.h" #include "network-coding.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { BATADV_NL_MCGRP_CONFIG, BATADV_NL_MCGRP_TPMETER, }; /** * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags */ enum batadv_genl_ops_flags { /** * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be * saved in info->user_ptr[0] */ BATADV_FLAG_NEED_MESH = BIT(0), /** * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be * saved in info->user_ptr[1] */ BATADV_FLAG_NEED_HARDIF = BIT(1), /** * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in * attribute BATADV_ATTR_VLANID and expects a pointer to it to be * saved in info->user_ptr[1] */ BATADV_FLAG_NEED_VLAN = BIT(2), }; static const struct genl_multicast_group batadv_netlink_mcgrps[] = { [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG }, [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_TQ] = { .type = NLA_U8 }, [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 }, [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN }, [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 }, [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_MULTICAST_FANOUT] = { .type = NLA_U32 }, [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 }, }; /** * batadv_netlink_get_ifindex() - Extract an interface index from a message * @nlh: Message header * @attrtype: Attribute which holds an interface index * * Return: interface index, or 0. */ int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) { struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); return (attr && nla_len(attr) == sizeof(u32)) ? nla_get_u32(attr) : 0; } /** * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute * @msg: Netlink message to dump into * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, struct batadv_priv *bat_priv) { struct batadv_softif_vlan *vlan; u8 ap_isolation; vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return 0; ap_isolation = atomic_read(&vlan->ap_isolation); batadv_softif_vlan_put(vlan); return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, !!ap_isolation); } /** * batadv_netlink_set_mesh_ap_isolation() - Set ap_isolation from genl msg * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, struct batadv_priv *bat_priv) { struct batadv_softif_vlan *vlan; vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return -ENOENT; atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); batadv_softif_vlan_put(vlan); return 0; } /** * batadv_netlink_mesh_fill() - Fill message with mesh attributes * @msg: Netlink message to dump into * @bat_priv: the bat priv with all the soft interface information * @cmd: type of message to generate * @portid: Port making netlink request * @seq: sequence number for message * @flags: Additional flags for message * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_mesh_fill(struct sk_buff *msg, struct batadv_priv *bat_priv, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { struct net_device *soft_iface = bat_priv->soft_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; void *hdr; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_priv->algo_ops->name) || nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, soft_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, ntohs(bat_priv->bla.claim_dest.group))) goto nla_put_failure; #endif if (batadv_mcast_mesh_info_put(msg, bat_priv)) goto nla_put_failure; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { hard_iface = primary_if->net_dev; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, hard_iface->ifindex) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, hard_iface->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, hard_iface->dev_addr)) goto nla_put_failure; } if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED, !!atomic_read(&bat_priv->aggregated_ogms))) goto nla_put_failure; if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK, bat_priv->isolation_mark)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK, bat_priv->isolation_mark_mask)) goto nla_put_failure; if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED, !!atomic_read(&bat_priv->bonding))) goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, !!atomic_read(&bat_priv->bridge_loop_avoidance))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_BLA */ #ifdef CONFIG_BATMAN_ADV_DAT if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, !!atomic_read(&bat_priv->distributed_arp_table))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_DAT */ if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED, !!atomic_read(&bat_priv->fragmentation))) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN, atomic_read(&bat_priv->gw.bandwidth_down))) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP, atomic_read(&bat_priv->gw.bandwidth_up))) goto nla_put_failure; if (nla_put_u8(msg, BATADV_ATTR_GW_MODE, atomic_read(&bat_priv->gw.mode))) goto nla_put_failure; if (bat_priv->algo_ops->gw.get_best_gw_node && bat_priv->algo_ops->gw.is_eligible) { /* GW selection class is not available if the routing algorithm * in use does not implement the GW API */ if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS, atomic_read(&bat_priv->gw.sel_class))) goto nla_put_failure; } if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, atomic_read(&bat_priv->hop_penalty))) goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_DEBUG if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL, atomic_read(&bat_priv->log_level))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_DEBUG */ #ifdef CONFIG_BATMAN_ADV_MCAST if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, !atomic_read(&bat_priv->multicast_mode))) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_MULTICAST_FANOUT, atomic_read(&bat_priv->multicast_fanout))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_MCAST */ #ifdef CONFIG_BATMAN_ADV_NC if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED, !!atomic_read(&bat_priv->network_coding))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_NC */ if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL, atomic_read(&bat_priv->orig_interval))) goto nla_put_failure; batadv_hardif_put(primary_if); genlmsg_end(msg, hdr); return 0; nla_put_failure: batadv_hardif_put(primary_if); genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_netlink_notify_mesh() - send softif attributes to listener * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success, < 0 on error */ static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH, 0, 0, 0); if (ret < 0) { nlmsg_free(msg); return ret; } genlmsg_multicast_netns(&batadv_netlink_family, dev_net(bat_priv->soft_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; } /** * batadv_netlink_get_mesh() - Get softif attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) { struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH, info->snd_portid, info->snd_seq, 0); if (ret < 0) { nlmsg_free(msg); return ret; } ret = genlmsg_reply(msg, info); return ret; } /** * batadv_netlink_set_mesh() - Set softif attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) { struct batadv_priv *bat_priv = info->user_ptr[0]; struct nlattr *attr; if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) { attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]; atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); } if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; batadv_netlink_set_mesh_ap_isolation(attr, bat_priv); } if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) { attr = info->attrs[BATADV_ATTR_ISOLATION_MARK]; bat_priv->isolation_mark = nla_get_u32(attr); } if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) { attr = info->attrs[BATADV_ATTR_ISOLATION_MASK]; bat_priv->isolation_mark_mask = nla_get_u32(attr); } if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) { attr = info->attrs[BATADV_ATTR_BONDING_ENABLED]; atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); } #ifdef CONFIG_BATMAN_ADV_BLA if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) { attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]; atomic_set(&bat_priv->bridge_loop_avoidance, !!nla_get_u8(attr)); batadv_bla_status_update(bat_priv->soft_iface); } #endif /* CONFIG_BATMAN_ADV_BLA */ #ifdef CONFIG_BATMAN_ADV_DAT if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) { attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]; atomic_set(&bat_priv->distributed_arp_table, !!nla_get_u8(attr)); batadv_dat_status_update(bat_priv->soft_iface); } #endif /* CONFIG_BATMAN_ADV_DAT */ if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) { attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); rtnl_lock(); batadv_update_min_mtu(bat_priv->soft_iface); rtnl_unlock(); } if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]; atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr)); batadv_gw_tvlv_container_update(bat_priv); } if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) { attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]; atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr)); batadv_gw_tvlv_container_update(bat_priv); } if (info->attrs[BATADV_ATTR_GW_MODE]) { u8 gw_mode; attr = info->attrs[BATADV_ATTR_GW_MODE]; gw_mode = nla_get_u8(attr); if (gw_mode <= BATADV_GW_MODE_SERVER) { /* Invoking batadv_gw_reselect() is not enough to really * de-select the current GW. It will only instruct the * gateway client code to perform a re-election the next * time that this is needed. * * When gw client mode is being switched off the current * GW must be de-selected explicitly otherwise no GW_ADD * uevent is thrown on client mode re-activation. This * is operation is performed in * batadv_gw_check_client_stop(). */ batadv_gw_reselect(bat_priv); /* always call batadv_gw_check_client_stop() before * changing the gateway state */ batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw.mode, gw_mode); batadv_gw_tvlv_container_update(bat_priv); } } if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] && bat_priv->algo_ops->gw.get_best_gw_node && bat_priv->algo_ops->gw.is_eligible) { /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API */ u32 sel_class_max = bat_priv->algo_ops->gw.sel_class_max; u32 sel_class; attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS]; sel_class = nla_get_u32(attr); if (sel_class >= 1 && sel_class <= sel_class_max) { atomic_set(&bat_priv->gw.sel_class, sel_class); batadv_gw_reselect(bat_priv); } } if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); } #ifdef CONFIG_BATMAN_ADV_DEBUG if (info->attrs[BATADV_ATTR_LOG_LEVEL]) { attr = info->attrs[BATADV_ATTR_LOG_LEVEL]; atomic_set(&bat_priv->log_level, nla_get_u32(attr) & BATADV_DBG_ALL); } #endif /* CONFIG_BATMAN_ADV_DEBUG */ #ifdef CONFIG_BATMAN_ADV_MCAST if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) { attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]; atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); } if (info->attrs[BATADV_ATTR_MULTICAST_FANOUT]) { attr = info->attrs[BATADV_ATTR_MULTICAST_FANOUT]; atomic_set(&bat_priv->multicast_fanout, nla_get_u32(attr)); } #endif /* CONFIG_BATMAN_ADV_MCAST */ #ifdef CONFIG_BATMAN_ADV_NC if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) { attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); batadv_nc_status_update(bat_priv->soft_iface); } #endif /* CONFIG_BATMAN_ADV_NC */ if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) { u32 orig_interval; attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL]; orig_interval = nla_get_u32(attr); orig_interval = min_t(u32, orig_interval, INT_MAX); orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER); atomic_set(&bat_priv->orig_interval, orig_interval); } batadv_netlink_notify_mesh(bat_priv); return 0; } /** * batadv_netlink_tp_meter_put() - Fill information of started tp_meter session * @msg: netlink message to be sent back * @cookie: tp meter session cookie * * Return: 0 on success, < 0 on error */ static int batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) { if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) return -ENOBUFS; return 0; } /** * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client * @bat_priv: the bat priv with all the soft interface information * @dst: destination of tp_meter session * @result: reason for tp meter session stop * @test_time: total time of the tp_meter session * @total_bytes: bytes acked to the receiver * @cookie: cookie of tp_meter session * * Return: 0 on success, < 0 on error */ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie) { struct sk_buff *msg; void *hdr; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0, BATADV_CMD_TP_METER); if (!hdr) { ret = -ENOBUFS; goto err_genlmsg; } if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time)) goto nla_put_failure; if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes, BATADV_ATTR_PAD)) goto nla_put_failure; if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result)) goto nla_put_failure; if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&batadv_netlink_family, dev_net(bat_priv->soft_iface), msg, 0, BATADV_NL_MCGRP_TPMETER, GFP_KERNEL); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); ret = -EMSGSIZE; err_genlmsg: nlmsg_free(msg); return ret; } /** * batadv_netlink_tp_meter_start() - Start a new tp_meter session * @skb: received netlink message * @info: receiver information * * Return: 0 on success, < 0 on error */ static int batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) { struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg = NULL; u32 test_length; void *msg_head; u32 cookie; u8 *dst; int ret; if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) return -EINVAL; dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; } msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, &batadv_netlink_family, 0, BATADV_CMD_TP_METER); if (!msg_head) { ret = -ENOBUFS; goto out; } batadv_tp_start(bat_priv, dst, test_length, &cookie); ret = batadv_netlink_tp_meter_put(msg, cookie); out: if (ret) { if (msg) nlmsg_free(msg); return ret; } genlmsg_end(msg, msg_head); return genlmsg_reply(msg, info); } /** * batadv_netlink_tp_meter_cancel() - Cancel a running tp_meter session * @skb: received netlink message * @info: receiver information * * Return: 0 on success, < 0 on error */ static int batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) { struct batadv_priv *bat_priv = info->user_ptr[0]; u8 *dst; int ret = 0; if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); return ret; } /** * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into * @bat_priv: the bat priv with all the soft interface information * @hard_iface: hard interface which was modified * @cmd: type of message to generate * @portid: Port making netlink request * @seq: sequence number for message * @flags: Additional flags for message * @cb: Control block containing additional options * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_hardif_fill(struct sk_buff *msg, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags, struct netlink_callback *cb) { struct net_device *net_dev = hard_iface->net_dev; void *hdr; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) return -ENOBUFS; if (cb) genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, bat_priv->soft_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, bat_priv->soft_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, net_dev->ifindex) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, net_dev->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, net_dev->dev_addr)) goto nla_put_failure; if (hard_iface->if_status == BATADV_IF_ACTIVE) { if (nla_put_flag(msg, BATADV_ATTR_ACTIVE)) goto nla_put_failure; } if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, atomic_read(&hard_iface->hop_penalty))) goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BATMAN_V if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, atomic_read(&hard_iface->bat_v.elp_interval))) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE, atomic_read(&hard_iface->bat_v.throughput_override))) goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_BATMAN_V */ genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_netlink_notify_hardif() - send hardif attributes to listener * @bat_priv: the bat priv with all the soft interface information * @hard_iface: hard interface which was modified * * Return: 0 on success, < 0 on error */ static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL); if (ret < 0) { nlmsg_free(msg); return ret; } genlmsg_multicast_netns(&batadv_netlink_family, dev_net(bat_priv->soft_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; } /** * batadv_netlink_get_hardif() - Get hardif attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_get_hardif(struct sk_buff *skb, struct genl_info *info) { struct batadv_hard_iface *hard_iface = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, BATADV_CMD_GET_HARDIF, info->snd_portid, info->snd_seq, 0, NULL); if (ret < 0) { nlmsg_free(msg); return ret; } ret = genlmsg_reply(msg, info); return ret; } /** * batadv_netlink_set_hardif() - Set hardif attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_hardif(struct sk_buff *skb, struct genl_info *info) { struct batadv_hard_iface *hard_iface = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct nlattr *attr; if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; atomic_set(&hard_iface->hop_penalty, nla_get_u8(attr)); } #ifdef CONFIG_BATMAN_ADV_BATMAN_V if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); } if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]; atomic_set(&hard_iface->bat_v.throughput_override, nla_get_u32(attr)); } #endif /* CONFIG_BATMAN_ADV_BATMAN_V */ batadv_netlink_notify_hardif(bat_priv, hard_iface); return 0; } /** * batadv_netlink_dump_hardif() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: error code, or length of reply message on success */ static int batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv; int ifindex; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; int i = 0; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface) return -ENODEV; if (!batadv_softif_is_valid(soft_iface)) { dev_put(soft_iface); return -ENODEV; } bat_priv = netdev_priv(soft_iface); rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface != soft_iface) continue; if (i++ < skip) continue; if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, BATADV_CMD_GET_HARDIF, portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb)) { i--; break; } } rtnl_unlock(); dev_put(soft_iface); cb->args[0] = i; return msg->len; } /** * batadv_netlink_vlan_fill() - Fill message with vlan attributes * @msg: Netlink message to dump into * @bat_priv: the bat priv with all the soft interface information * @vlan: vlan which was modified * @cmd: type of message to generate * @portid: Port making netlink request * @seq: sequence number for message * @flags: Additional flags for message * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_vlan_fill(struct sk_buff *msg, struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, bat_priv->soft_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, bat_priv->soft_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) goto nla_put_failure; if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, !!atomic_read(&vlan->ap_isolation))) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_netlink_notify_vlan() - send vlan attributes to listener * @bat_priv: the bat priv with all the soft interface information * @vlan: vlan which was modified * * Return: 0 on success, < 0 on error */ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_SET_VLAN, 0, 0, 0); if (ret < 0) { nlmsg_free(msg); return ret; } genlmsg_multicast_netns(&batadv_netlink_family, dev_net(bat_priv->soft_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; } /** * batadv_netlink_get_vlan() - Get vlan attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) { struct batadv_softif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN, info->snd_portid, info->snd_seq, 0); if (ret < 0) { nlmsg_free(msg); return ret; } ret = genlmsg_reply(msg, info); return ret; } /** * batadv_netlink_set_vlan() - Get vlan attributes * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) { struct batadv_softif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct nlattr *attr; if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); } batadv_netlink_notify_vlan(bat_priv, vlan); return 0; } /** * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes * @net: the applicable net namespace * @info: receiver information * * Return: Pointer to soft interface (with increased refcnt) on success, error * pointer on error */ static struct net_device * batadv_get_softif_from_info(struct net *net, struct genl_info *info) { struct net_device *soft_iface; int ifindex; if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) return ERR_PTR(-EINVAL); ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface) return ERR_PTR(-ENODEV); if (!batadv_softif_is_valid(soft_iface)) goto err_put_softif; return soft_iface; err_put_softif: dev_put(soft_iface); return ERR_PTR(-EINVAL); } /** * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes * @bat_priv: the bat priv with all the soft interface information * @net: the applicable net namespace * @info: receiver information * * Return: Pointer to hard interface (with increased refcnt) on success, error * pointer on error */ static struct batadv_hard_iface * batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net, struct genl_info *info) { struct batadv_hard_iface *hard_iface; struct net_device *hard_dev; unsigned int hardif_index; if (!info->attrs[BATADV_ATTR_HARD_IFINDEX]) return ERR_PTR(-EINVAL); hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]); hard_dev = dev_get_by_index(net, hardif_index); if (!hard_dev) return ERR_PTR(-ENODEV); hard_iface = batadv_hardif_get_by_netdev(hard_dev); if (!hard_iface) goto err_put_harddev; if (hard_iface->soft_iface != bat_priv->soft_iface) goto err_put_hardif; /* hard_dev is referenced by hard_iface and not needed here */ dev_put(hard_dev); return hard_iface; err_put_hardif: batadv_hardif_put(hard_iface); err_put_harddev: dev_put(hard_dev); return ERR_PTR(-EINVAL); } /** * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes * @bat_priv: the bat priv with all the soft interface information * @net: the applicable net namespace * @info: receiver information * * Return: Pointer to vlan on success (with increased refcnt), error pointer * on error */ static struct batadv_softif_vlan * batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, struct genl_info *info) { struct batadv_softif_vlan *vlan; u16 vid; if (!info->attrs[BATADV_ATTR_VLANID]) return ERR_PTR(-EINVAL); vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return ERR_PTR(-ENOENT); return vlan; } /** * batadv_pre_doit() - Prepare batman-adv genl doit request * @ops: requested netlink operation * @skb: Netlink message with request data * @info: receiver information * * Return: 0 on success or negative error number in case of failure */ static int batadv_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv = NULL; struct batadv_softif_vlan *vlan; struct net_device *soft_iface; u8 user_ptr1_flags; u8 mesh_dep_flags; int ret; user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) return -EINVAL; mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; if (WARN_ON((ops->internal_flags & mesh_dep_flags) && (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) return -EINVAL; if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { soft_iface = batadv_get_softif_from_info(net, info); if (IS_ERR(soft_iface)) return PTR_ERR(soft_iface); bat_priv = netdev_priv(soft_iface); info->user_ptr[0] = bat_priv; } if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) { hard_iface = batadv_get_hardif_from_info(bat_priv, net, info); if (IS_ERR(hard_iface)) { ret = PTR_ERR(hard_iface); goto err_put_softif; } info->user_ptr[1] = hard_iface; } if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) { vlan = batadv_get_vlan_from_info(bat_priv, net, info); if (IS_ERR(vlan)) { ret = PTR_ERR(vlan); goto err_put_softif; } info->user_ptr[1] = vlan; } return 0; err_put_softif: if (bat_priv) dev_put(bat_priv->soft_iface); return ret; } /** * batadv_post_doit() - End batman-adv genl doit request * @ops: requested netlink operation * @skb: Netlink message with request data * @info: receiver information */ static void batadv_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct batadv_hard_iface *hard_iface; struct batadv_softif_vlan *vlan; struct batadv_priv *bat_priv; if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && info->user_ptr[1]) { hard_iface = info->user_ptr[1]; batadv_hardif_put(hard_iface); } if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { vlan = info->user_ptr[1]; batadv_softif_vlan_put(vlan); } if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { bat_priv = info->user_ptr[0]; dev_put(bat_priv->soft_iface); } } static const struct genl_small_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .doit = batadv_netlink_get_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_algo_dump, }, { .cmd = BATADV_CMD_GET_HARDIF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .dumpit = batadv_netlink_dump_hardif, .doit = batadv_netlink_get_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_VLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .doit = batadv_netlink_get_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, }, { .cmd = BATADV_CMD_SET_VLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, }, }; struct genl_family batadv_netlink_family __ro_after_init = { .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, .policy = batadv_netlink_policy, .netnsok = true, .pre_doit = batadv_pre_doit, .post_doit = batadv_post_doit, .module = THIS_MODULE, .small_ops = batadv_netlink_ops, .n_small_ops = ARRAY_SIZE(batadv_netlink_ops), .resv_start_op = BATADV_CMD_SET_VLAN + 1, .mcgrps = batadv_netlink_mcgrps, .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), }; /** * batadv_netlink_register() - register batadv genl netlink family */ void __init batadv_netlink_register(void) { int ret; ret = genl_register_family(&batadv_netlink_family); if (ret) pr_warn("unable to register netlink family"); } /** * batadv_netlink_unregister() - unregister batadv genl netlink family */ void batadv_netlink_unregister(void) { genl_unregister_family(&batadv_netlink_family); } |
47 47 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | // SPDX-License-Identifier: GPL-2.0-only /* * Cryptographic API. */ #include <linux/init.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/lzo.h> #include <crypto/internal/scompress.h> struct lzo_ctx { void *lzo_comp_mem; }; static void *lzo_alloc_ctx(struct crypto_scomp *tfm) { void *ctx; ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); return ctx; } static int lzo_init(struct crypto_tfm *tfm) { struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); ctx->lzo_comp_mem = lzo_alloc_ctx(NULL); if (IS_ERR(ctx->lzo_comp_mem)) return -ENOMEM; return 0; } static void lzo_free_ctx(struct crypto_scomp *tfm, void *ctx) { kvfree(ctx); } static void lzo_exit(struct crypto_tfm *tfm) { struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); lzo_free_ctx(NULL, ctx->lzo_comp_mem); } static int __lzo_compress(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ int err; err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx); if (err != LZO_E_OK) return -EINVAL; *dlen = tmp_len; return 0; } static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); return __lzo_compress(src, slen, dst, dlen, ctx->lzo_comp_mem); } static int lzo_scompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { return __lzo_compress(src, slen, dst, dlen, ctx); } static int __lzo_decompress(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { int err; size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ err = lzo1x_decompress_safe(src, slen, dst, &tmp_len); if (err != LZO_E_OK) return -EINVAL; *dlen = tmp_len; return 0; } static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { return __lzo_decompress(src, slen, dst, dlen); } static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { return __lzo_decompress(src, slen, dst, dlen); } static struct crypto_alg alg = { .cra_name = "lzo", .cra_driver_name = "lzo-generic", .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, .cra_ctxsize = sizeof(struct lzo_ctx), .cra_module = THIS_MODULE, .cra_init = lzo_init, .cra_exit = lzo_exit, .cra_u = { .compress = { .coa_compress = lzo_compress, .coa_decompress = lzo_decompress } } }; static struct scomp_alg scomp = { .alloc_ctx = lzo_alloc_ctx, .free_ctx = lzo_free_ctx, .compress = lzo_scompress, .decompress = lzo_sdecompress, .base = { .cra_name = "lzo", .cra_driver_name = "lzo-scomp", .cra_module = THIS_MODULE, } }; static int __init lzo_mod_init(void) { int ret; ret = crypto_register_alg(&alg); if (ret) return ret; ret = crypto_register_scomp(&scomp); if (ret) { crypto_unregister_alg(&alg); return ret; } return ret; } static void __exit lzo_mod_fini(void) { crypto_unregister_alg(&alg); crypto_unregister_scomp(&scomp); } subsys_initcall(lzo_mod_init); module_exit(lzo_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO Compression Algorithm"); MODULE_ALIAS_CRYPTO("lzo"); |
276 1 3 271 855 1 17 830 830 751 1 190 820 15 3 6 6 6 6 1 1 12 12 99 95 5 98 1 1 98 98 96 94 5 97 103 1 100 2 4 1065 1066 1051 814 1045 24 1036 1042 2 277 22 177 7 1059 24 1037 10 1037 10 3 1041 14 1040 1 20 1026 2 14 869 1039 124 189 1 1004 13 10 1030 16 7 904 189 2 5 183 1 1002 2 758 753 4 752 763 25 760 763 759 13 748 754 3 2 2 2 2 773 772 2 15 761 1 2 8 13 16 763 1 1 174 171 6 173 167 26 284 85 557 523 4 486 3 8 3 473 8 475 479 482 32 1 1 30 1 29 31 31 84 81 2 82 1 83 2 2 2 280 270 10 2 16 252 287 287 1 6 2 9 11 262 6 270 283 78 1 70 65 3 61 45 1 1 42 43 43 43 97 97 94 97 97 3 95 97 98 1 97 5 61 1 1 5 53 1 5 59 60 59 1 22 22 10 15 22 22 22 22 33 6 39 39 36 33 3 36 97 97 7 90 1 1 51 51 51 8 6 77 77 120 120 1 102 20 1 118 102 19 53 77 118 93 9 29 40 108 54 76 102 18 108 6 76 98 93 8 26 85 93 98 2 77 54 9 1 8 1 8 5 3 5 3 6 2 1 5 3 5 3 8 125 3 54 ||