| 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> struct nft_dup_netdev { u8 sreg_dev; }; static void nft_dup_netdev_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_dup_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; nf_dup_netdev_egress(pkt, oif); } static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = { [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, }; static int nft_dup_netdev_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_dup_netdev *priv = nft_expr_priv(expr); if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, sizeof(int)); } static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_dup_netdev *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_dup_netdev *priv = nft_expr_priv(expr); int oif = ctx->regs[priv->sreg_dev].data.data[0]; return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); } static bool nft_dup_netdev_offload_action(const struct nft_expr *expr) { return true; } static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)), .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_dup_netdev_offload, .offload_action = nft_dup_netdev_offload_action, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { .family = NFPROTO_NETDEV, .name = "dup", .ops = &nft_dup_netdev_ops, .policy = nft_dup_netdev_policy, .maxattr = NFTA_DUP_MAX, .owner = THIS_MODULE, }; static int __init nft_dup_netdev_module_init(void) { return nft_register_expr(&nft_dup_netdev_type); } static void __exit nft_dup_netdev_module_exit(void) { nft_unregister_expr(&nft_dup_netdev_type); } module_init(nft_dup_netdev_module_init); module_exit(nft_dup_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); MODULE_DESCRIPTION("nftables netdev packet duplication support"); |
| 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef RDMA_CORE_H #define RDMA_CORE_H #include <linux/idr.h> #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/ib_verbs.h> #include <linux/mutex.h> struct ib_uverbs_device; void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) * or unlock (access == READ || access == WRITE) objects if required. * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ struct ib_uobject * uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); void uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool hw_obj_valid, bool commit, struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs); /* * This is the runtime description of the uverbs API, used by the syscall * machinery to validate and dispatch calls. */ /* * Depending on ID the slot pointer in the radix tree points at one of these * structs. */ struct uverbs_api_ioctl_method { int(__rcu *handler)(struct uverbs_attr_bundle *attrs); DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); u16 bundle_size; u8 use_stack:1; u8 driver_method:1; u8 disabled:1; u8 has_udata:1; u8 key_bitmap_len; u8 destroy_bkey; }; struct uverbs_api_write_method { int (*handler)(struct uverbs_attr_bundle *attrs); u8 disabled:1; u8 is_ex:1; u8 has_udata:1; u8 has_resp:1; u8 req_size; u8 resp_size; }; struct uverbs_api_attr { struct uverbs_attr_spec spec; }; struct uverbs_api { /* radix tree contains struct uverbs_api_* pointers */ struct radix_tree_root radix; enum rdma_driver_id driver_id; unsigned int num_write; unsigned int num_write_ex; struct uverbs_api_write_method notsupp_method; const struct uverbs_api_write_method **write_methods; const struct uverbs_api_write_method **write_ex_methods; }; /* * Get an uverbs_api_object that corresponds to the given object_id. * Note: * -ENOMSG means that any object is allowed to match during lookup. */ static inline const struct uverbs_api_object * uapi_get_object(struct uverbs_api *uapi, u16 object_id) { const struct uverbs_api_object *res; if (object_id == UVERBS_IDR_ANY_OBJECT) return ERR_PTR(-ENOMSG); res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); if (!res) return ERR_PTR(-ENOENT); return res; } char *uapi_key_format(char *S, unsigned int key); struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev); void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); extern const struct uapi_definition uverbs_def_obj_async_fd[]; extern const struct uapi_definition uverbs_def_obj_counters[]; extern const struct uapi_definition uverbs_def_obj_cq[]; extern const struct uapi_definition uverbs_def_obj_device[]; extern const struct uapi_definition uverbs_def_obj_dm[]; extern const struct uapi_definition uverbs_def_obj_dmah[]; extern const struct uapi_definition uverbs_def_obj_flow_action[]; extern const struct uapi_definition uverbs_def_obj_intf[]; extern const struct uapi_definition uverbs_def_obj_mr[]; extern const struct uapi_definition uverbs_def_obj_qp[]; extern const struct uapi_definition uverbs_def_obj_srq[]; extern const struct uapi_definition uverbs_def_obj_wq[]; extern const struct uapi_definition uverbs_def_write_intf[]; static inline const struct uverbs_api_write_method * uapi_get_method(const struct uverbs_api *uapi, u32 command) { u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK; if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | IB_USER_VERBS_CMD_COMMAND_MASK)) return ERR_PTR(-EINVAL); if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) { if (cmd_idx >= uapi->num_write_ex) return ERR_PTR(-EOPNOTSUPP); return uapi->write_ex_methods[cmd_idx]; } if (cmd_idx >= uapi->num_write) return ERR_PTR(-EOPNOTSUPP); return uapi->write_methods[cmd_idx]; } void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata, unsigned int attr_in, unsigned int attr_out); #endif /* RDMA_CORE_H */ |
| 6 5 139 140 140 138 139 138 173 174 174 174 174 174 7 6 7 7 7 7 7 7 7 7 7 7 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Advanced Linux Sound Architecture * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/device.h> #include <linux/module.h> #include <linux/debugfs.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> #include <sound/control.h> #include <sound/initval.h> #include <linux/kmod.h> #include <linux/mutex.h> static int major = CONFIG_SND_MAJOR; int snd_major; EXPORT_SYMBOL(snd_major); static int cards_limit = 1; MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Advanced Linux Sound Architecture driver for soundcards."); MODULE_LICENSE("GPL"); module_param(major, int, 0444); MODULE_PARM_DESC(major, "Major # for sound driver."); module_param(cards_limit, int, 0444); MODULE_PARM_DESC(cards_limit, "Count of auto-loadable soundcards."); MODULE_ALIAS_CHARDEV_MAJOR(CONFIG_SND_MAJOR); /* this one holds the actual max. card number currently available. * as default, it's identical with cards_limit option. when more * modules are loaded manually, this limit number increases, too. */ int snd_ecards_limit; EXPORT_SYMBOL(snd_ecards_limit); #ifdef CONFIG_SND_DEBUG struct dentry *sound_debugfs_root; EXPORT_SYMBOL_GPL(sound_debugfs_root); #endif static struct snd_minor *snd_minors[SNDRV_OS_MINORS]; static DEFINE_MUTEX(sound_mutex); #ifdef CONFIG_MODULES /** * snd_request_card - try to load the card module * @card: the card number * * Tries to load the module "snd-card-X" for the given card number * via request_module. Returns immediately if already loaded. */ void snd_request_card(int card) { if (snd_card_locked(card)) return; if (card < 0 || card >= cards_limit) return; request_module("snd-card-%i", card); } EXPORT_SYMBOL(snd_request_card); static void snd_request_other(int minor) { char *str; switch (minor) { case SNDRV_MINOR_SEQUENCER: str = "snd-seq"; break; case SNDRV_MINOR_TIMER: str = "snd-timer"; break; default: return; } request_module(str); } #endif /* modular kernel */ /** * snd_lookup_minor_data - get user data of a registered device * @minor: the minor number * @type: device type (SNDRV_DEVICE_TYPE_XXX) * * Checks that a minor device with the specified type is registered, and returns * its user data pointer. * * This function increments the reference counter of the card instance * if an associated instance with the given minor number and type is found. * The caller must call snd_card_unref() appropriately later. * * Return: The user data pointer if the specified device is found. %NULL * otherwise. */ void *snd_lookup_minor_data(unsigned int minor, int type) { struct snd_minor *mreg; void *private_data; if (minor >= ARRAY_SIZE(snd_minors)) return NULL; guard(mutex)(&sound_mutex); mreg = snd_minors[minor]; if (mreg && mreg->type == type) { private_data = mreg->private_data; if (private_data && mreg->card_ptr) get_device(&mreg->card_ptr->card_dev); } else private_data = NULL; return private_data; } EXPORT_SYMBOL(snd_lookup_minor_data); #ifdef CONFIG_MODULES static struct snd_minor *autoload_device(unsigned int minor) { int dev; mutex_unlock(&sound_mutex); /* release lock temporarily */ dev = SNDRV_MINOR_DEVICE(minor); if (dev == SNDRV_MINOR_CONTROL) { /* /dev/aloadC? */ int card = SNDRV_MINOR_CARD(minor); struct snd_card *ref = snd_card_ref(card); if (!ref) snd_request_card(card); else snd_card_unref(ref); } else if (dev == SNDRV_MINOR_GLOBAL) { /* /dev/aloadSEQ */ snd_request_other(minor); } mutex_lock(&sound_mutex); /* reacquire lock */ return snd_minors[minor]; } #else /* !CONFIG_MODULES */ #define autoload_device(minor) NULL #endif /* CONFIG_MODULES */ static int snd_open(struct inode *inode, struct file *file) { unsigned int minor = iminor(inode); struct snd_minor *mptr = NULL; const struct file_operations *new_fops; int err = 0; if (minor >= ARRAY_SIZE(snd_minors)) return -ENODEV; scoped_guard(mutex, &sound_mutex) { mptr = snd_minors[minor]; if (mptr == NULL) { mptr = autoload_device(minor); if (!mptr) return -ENODEV; } new_fops = fops_get(mptr->f_ops); } if (!new_fops) return -ENODEV; replace_fops(file, new_fops); if (file->f_op->open) err = file->f_op->open(inode, file); return err; } static const struct file_operations snd_fops = { .owner = THIS_MODULE, .open = snd_open, .llseek = noop_llseek, }; #ifdef CONFIG_SND_DYNAMIC_MINORS static int snd_find_free_minor(int type, struct snd_card *card, int dev) { int minor; /* static minors for module auto loading */ if (type == SNDRV_DEVICE_TYPE_SEQUENCER) return SNDRV_MINOR_SEQUENCER; if (type == SNDRV_DEVICE_TYPE_TIMER) return SNDRV_MINOR_TIMER; for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) { /* skip static minors still used for module auto loading */ if (SNDRV_MINOR_DEVICE(minor) == SNDRV_MINOR_CONTROL) continue; if (minor == SNDRV_MINOR_SEQUENCER || minor == SNDRV_MINOR_TIMER) continue; if (!snd_minors[minor]) return minor; } return -EBUSY; } #else static int snd_find_free_minor(int type, struct snd_card *card, int dev) { int minor; switch (type) { case SNDRV_DEVICE_TYPE_SEQUENCER: case SNDRV_DEVICE_TYPE_TIMER: minor = type; break; case SNDRV_DEVICE_TYPE_CONTROL: if (snd_BUG_ON(!card)) return -EINVAL; minor = SNDRV_MINOR(card->number, type); break; case SNDRV_DEVICE_TYPE_HWDEP: case SNDRV_DEVICE_TYPE_RAWMIDI: case SNDRV_DEVICE_TYPE_PCM_PLAYBACK: case SNDRV_DEVICE_TYPE_PCM_CAPTURE: case SNDRV_DEVICE_TYPE_COMPRESS: if (snd_BUG_ON(!card)) return -EINVAL; minor = SNDRV_MINOR(card->number, type + dev); break; default: return -EINVAL; } if (snd_BUG_ON(minor < 0 || minor >= SNDRV_OS_MINORS)) return -EINVAL; if (snd_minors[minor]) return -EBUSY; return minor; } #endif /** * snd_register_device - Register the ALSA device file for the card * @type: the device type, SNDRV_DEVICE_TYPE_XXX * @card: the card instance * @dev: the device index * @f_ops: the file operations * @private_data: user pointer for f_ops->open() * @device: the device to register * * Registers an ALSA device file for the given card. * The operators have to be set in reg parameter. * * Return: Zero if successful, or a negative error code on failure. */ int snd_register_device(int type, struct snd_card *card, int dev, const struct file_operations *f_ops, void *private_data, struct device *device) { int minor; int err = 0; struct snd_minor *preg; if (snd_BUG_ON(!device)) return -EINVAL; preg = kmalloc(sizeof *preg, GFP_KERNEL); if (preg == NULL) return -ENOMEM; preg->type = type; preg->card = card ? card->number : -1; preg->device = dev; preg->f_ops = f_ops; preg->private_data = private_data; preg->card_ptr = card; guard(mutex)(&sound_mutex); minor = snd_find_free_minor(type, card, dev); if (minor < 0) { err = minor; goto error; } preg->dev = device; device->devt = MKDEV(major, minor); err = device_add(device); if (err < 0) goto error; snd_minors[minor] = preg; error: if (err < 0) kfree(preg); return err; } EXPORT_SYMBOL(snd_register_device); /** * snd_unregister_device - unregister the device on the given card * @dev: the device instance * * Unregisters the device file already registered via * snd_register_device(). * * Return: Zero if successful, or a negative error code on failure. */ int snd_unregister_device(struct device *dev) { int minor; struct snd_minor *preg; guard(mutex)(&sound_mutex); for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) { preg = snd_minors[minor]; if (preg && preg->dev == dev) { snd_minors[minor] = NULL; device_del(dev); kfree(preg); break; } } if (minor >= ARRAY_SIZE(snd_minors)) return -ENOENT; return 0; } EXPORT_SYMBOL(snd_unregister_device); #ifdef CONFIG_SND_PROC_FS /* * INFO PART */ static const char *snd_device_type_name(int type) { switch (type) { case SNDRV_DEVICE_TYPE_CONTROL: return "control"; case SNDRV_DEVICE_TYPE_HWDEP: return "hardware dependent"; case SNDRV_DEVICE_TYPE_RAWMIDI: return "raw midi"; case SNDRV_DEVICE_TYPE_PCM_PLAYBACK: return "digital audio playback"; case SNDRV_DEVICE_TYPE_PCM_CAPTURE: return "digital audio capture"; case SNDRV_DEVICE_TYPE_SEQUENCER: return "sequencer"; case SNDRV_DEVICE_TYPE_TIMER: return "timer"; case SNDRV_DEVICE_TYPE_COMPRESS: return "compress"; default: return "?"; } } static void snd_minor_info_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { int minor; struct snd_minor *mptr; guard(mutex)(&sound_mutex); for (minor = 0; minor < SNDRV_OS_MINORS; ++minor) { mptr = snd_minors[minor]; if (!mptr) continue; if (mptr->card >= 0) { if (mptr->device >= 0) snd_iprintf(buffer, "%3i: [%2i-%2i]: %s\n", minor, mptr->card, mptr->device, snd_device_type_name(mptr->type)); else snd_iprintf(buffer, "%3i: [%2i] : %s\n", minor, mptr->card, snd_device_type_name(mptr->type)); } else snd_iprintf(buffer, "%3i: : %s\n", minor, snd_device_type_name(mptr->type)); } } int __init snd_minor_info_init(void) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(THIS_MODULE, "devices", NULL); if (!entry) return -ENOMEM; entry->c.text.read = snd_minor_info_read; return snd_info_register(entry); /* freed in error path */ } #endif /* CONFIG_SND_PROC_FS */ /* * INIT PART */ static int __init alsa_sound_init(void) { snd_major = major; snd_ecards_limit = cards_limit; if (register_chrdev(major, "alsa", &snd_fops)) { pr_err("ALSA core: unable to register native major device number %d\n", major); return -EIO; } if (snd_info_init() < 0) { unregister_chrdev(major, "alsa"); return -ENOMEM; } #ifdef CONFIG_SND_DEBUG sound_debugfs_root = debugfs_create_dir("sound", NULL); #endif #ifndef MODULE pr_info("Advanced Linux Sound Architecture Driver Initialized.\n"); #endif return 0; } static void __exit alsa_sound_exit(void) { #ifdef CONFIG_SND_DEBUG debugfs_remove(sound_debugfs_root); #endif snd_info_done(); unregister_chrdev(major, "alsa"); } subsys_initcall(alsa_sound_init); module_exit(alsa_sound_exit); |
| 4 4 4 205 193 193 23 5 5 4 2 4 4 4 5 5 5 5 5 5 5 5 5 5 68 68 68 2 2 66 68 66 68 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * * Copyright (c) 2018 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Deepak Rawat <drawat@vmware.com> * Rob Clark <robdclark@gmail.com> * **************************************************************************/ #include <linux/export.h> #include <drm/drm_atomic.h> #include <drm/drm_damage_helper.h> #include <drm/drm_device.h> #include <drm/drm_framebuffer.h> static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, struct drm_mode_rect *dest, uint32_t num_clips, uint32_t src_inc) { while (num_clips > 0) { dest->x1 = src->x1; dest->y1 = src->y1; dest->x2 = src->x2; dest->y2 = src->y2; src += src_inc; dest++; num_clips--; } } /** * drm_atomic_helper_check_plane_damage - Verify plane damage on atomic_check. * @state: The driver state object. * @plane_state: Plane state for which to verify damage. * * This helper function makes sure that damage from plane state is discarded * for full modeset. If there are more reasons a driver would want to do a full * plane update rather than processing individual damage regions, then those * cases should be taken care of here. * * Note that &drm_plane_state.fb_damage_clips == NULL in plane state means that * full plane update should happen. It also ensure helper iterator will return * &drm_plane_state.src as damage. */ void drm_atomic_helper_check_plane_damage(struct drm_atomic_state *state, struct drm_plane_state *plane_state) { struct drm_crtc_state *crtc_state; if (plane_state->crtc) { crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); if (WARN_ON(!crtc_state)) return; if (drm_atomic_crtc_needs_modeset(crtc_state)) { drm_property_blob_put(plane_state->fb_damage_clips); plane_state->fb_damage_clips = NULL; } } } EXPORT_SYMBOL(drm_atomic_helper_check_plane_damage); /** * drm_atomic_helper_dirtyfb - Helper for dirtyfb. * @fb: DRM framebuffer. * @file_priv: Drm file for the ioctl call. * @flags: Dirty fb annotate flags. * @color: Color for annotate fill. * @clips: Dirty region. * @num_clips: Count of clip in clips. * * A helper to implement &drm_framebuffer_funcs.dirty using damage interface * during plane update. If num_clips is 0 then this helper will do a full plane * update. This is the same behaviour expected by DIRTFB IOCTL. * * Note that this helper is blocking implementation. This is what current * drivers and userspace expect in their DIRTYFB IOCTL implementation, as a way * to rate-limit userspace and make sure its rendering doesn't get ahead of * uploading new data too much. * * Return: Zero on success, negative errno on failure. */ int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file_priv, unsigned int flags, unsigned int color, struct drm_clip_rect *clips, unsigned int num_clips) { struct drm_modeset_acquire_ctx ctx; struct drm_property_blob *damage = NULL; struct drm_mode_rect *rects = NULL; struct drm_atomic_state *state; struct drm_plane *plane; int ret = 0; /* * When called from ioctl, we are interruptible, but not when called * internally (ie. defio worker) */ drm_modeset_acquire_init(&ctx, file_priv ? DRM_MODESET_ACQUIRE_INTERRUPTIBLE : 0); state = drm_atomic_state_alloc(fb->dev); if (!state) { ret = -ENOMEM; goto out_drop_locks; } state->acquire_ctx = &ctx; if (clips) { uint32_t inc = 1; if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) { inc = 2; num_clips /= 2; } rects = kcalloc(num_clips, sizeof(*rects), GFP_KERNEL); if (!rects) { ret = -ENOMEM; goto out; } convert_clip_rect_to_rect(clips, rects, num_clips, inc); damage = drm_property_create_blob(fb->dev, num_clips * sizeof(*rects), rects); if (IS_ERR(damage)) { ret = PTR_ERR(damage); damage = NULL; goto out; } } retry: drm_for_each_plane(plane, fb->dev) { struct drm_plane_state *plane_state; ret = drm_modeset_lock(&plane->mutex, state->acquire_ctx); if (ret) goto out; if (plane->state->fb != fb) { drm_modeset_unlock(&plane->mutex); continue; } plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); goto out; } drm_property_replace_blob(&plane_state->fb_damage_clips, damage); } ret = drm_atomic_commit(state); out: if (ret == -EDEADLK) { drm_atomic_state_clear(state); ret = drm_modeset_backoff(&ctx); if (!ret) goto retry; } drm_property_blob_put(damage); kfree(rects); drm_atomic_state_put(state); out_drop_locks: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; } EXPORT_SYMBOL(drm_atomic_helper_dirtyfb); /** * drm_atomic_helper_damage_iter_init - Initialize the damage iterator. * @iter: The iterator to initialize. * @old_state: Old plane state for validation. * @state: Plane state from which to iterate the damage clips. * * Initialize an iterator, which clips plane damage * &drm_plane_state.fb_damage_clips to plane &drm_plane_state.src. This iterator * returns full plane src in case damage is not present because either * user-space didn't sent or driver discarded it (it want to do full plane * update). Currently this iterator returns full plane src in case plane src * changed but that can be changed in future to return damage. * * For the case when plane is not visible or plane update should not happen the * first call to iter_next will return false. Note that this helper use clipped * &drm_plane_state.src, so driver calling this helper should have called * drm_atomic_helper_check_plane_state() earlier. */ void drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, const struct drm_plane_state *old_state, const struct drm_plane_state *state) { struct drm_rect src; memset(iter, 0, sizeof(*iter)); if (!state || !state->crtc || !state->fb || !state->visible) return; iter->clips = (struct drm_rect *)drm_plane_get_damage_clips(state); iter->num_clips = drm_plane_get_damage_clips_count(state); /* Round down for x1/y1 and round up for x2/y2 to catch all pixels */ src = drm_plane_state_src(state); iter->plane_src.x1 = src.x1 >> 16; iter->plane_src.y1 = src.y1 >> 16; iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF); iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF); if (!iter->clips || state->ignore_damage_clips || !drm_rect_equals(&state->src, &old_state->src)) { iter->clips = NULL; iter->num_clips = 0; iter->full_update = true; } } EXPORT_SYMBOL(drm_atomic_helper_damage_iter_init); /** * drm_atomic_helper_damage_iter_next - Advance the damage iterator. * @iter: The iterator to advance. * @rect: Return a rectangle in fb coordinate clipped to plane src. * * Since plane src is in 16.16 fixed point and damage clips are whole number, * this iterator round off clips that intersect with plane src. Round down for * x1/y1 and round up for x2/y2 for the intersected coordinate. Similar rounding * off for full plane src, in case it's returned as damage. This iterator will * skip damage clips outside of plane src. * * Return: True if the output is valid, false if reached the end. * * If the first call to iterator next returns false then it means no need to * update the plane. */ bool drm_atomic_helper_damage_iter_next(struct drm_atomic_helper_damage_iter *iter, struct drm_rect *rect) { bool ret = false; if (iter->full_update) { *rect = iter->plane_src; iter->full_update = false; return true; } while (iter->curr_clip < iter->num_clips) { *rect = iter->clips[iter->curr_clip]; iter->curr_clip++; if (drm_rect_intersect(rect, &iter->plane_src)) { ret = true; break; } } return ret; } EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next); /** * drm_atomic_helper_damage_merged - Merged plane damage * @old_state: Old plane state for validation. * @state: Plane state from which to iterate the damage clips. * @rect: Returns the merged damage rectangle * * This function merges any valid plane damage clips into one rectangle and * returns it in @rect. * * For details see: drm_atomic_helper_damage_iter_init() and * drm_atomic_helper_damage_iter_next(). * * Returns: * True if there is valid plane damage otherwise false. */ bool drm_atomic_helper_damage_merged(const struct drm_plane_state *old_state, const struct drm_plane_state *state, struct drm_rect *rect) { struct drm_atomic_helper_damage_iter iter; struct drm_rect clip; bool valid = false; rect->x1 = INT_MAX; rect->y1 = INT_MAX; rect->x2 = 0; rect->y2 = 0; drm_atomic_helper_damage_iter_init(&iter, old_state, state); drm_atomic_for_each_plane_damage(&iter, &clip) { rect->x1 = min(rect->x1, clip.x1); rect->y1 = min(rect->y1, clip.y1); rect->x2 = max(rect->x2, clip.x2); rect->y2 = max(rect->y2, clip.y2); valid = true; } return valid; } EXPORT_SYMBOL(drm_atomic_helper_damage_merged); |
| 10298 10284 9 1 8 2 6 1 7 1 6 2 2 1 2 2 4 3 1 3 3 3 2 2 1 1 1 1 1 1 1 1 1 9 5 2 3 2 2 2 5 1 1 1 1883 1182 1176 1883 49 50 5 644 42 1876 1883 207 644 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 | // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm_device.c - IPsec device offloading code. * * Copyright (c) 2015 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/errno.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> #include <net/gso.h> #include <net/xfrm.h> #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len - x->props.enc_hdr_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); int phlen = 0; if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); if (x->sel.family != AF_INET6) { phlen = IPV4_BEET_PHMAXLEN; if (x->outer_mode.family == AF_INET6) phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr); } pskb_pull(skb, skb->mac_len + hsize + (x->props.header_len - phlen)); } /* Adjust pointers into the packet when IPsec is done at layer2 */ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_TRANSPORT: if (x->outer_mode.family == AF_INET) return __xfrm_transport_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_transport_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_BEET: if (x->outer_mode.family == AF_INET) return __xfrm_mode_beet_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_beet_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: break; } } static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); __u32 seq = xo->seq.low; seq += skb_shinfo(skb)->gso_segs; if (unlikely(seq < xo->seq.low)) return true; return false; } struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; unsigned long flags; struct xfrm_state *x; struct softnet_data *sd; struct sk_buff *skb2, *nskb, *pskb = NULL; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct net_device *dev = skb->dev; struct sec_path *sp; if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) return skb; /* The packet was sent to HW IPsec packet offload engine, * but to wrong device. Drop the packet, so it won't skip * XFRM stack. */ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); err = !skb_queue_empty(&sd->xfrm_backlog); local_irq_restore(flags); if (err) { *again = true; return skb; } if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); segs = skb_gso_segment(skb, esp_features); if (IS_ERR(segs)) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } else { consume_skb(skb); skb = segs; } } if (!skb->next) { esp_features |= skb->dev->gso_partial_features; xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; err = x->type_offload->xmit(x, skb, esp_features); if (err) { if (err == -EINPROGRESS) return NULL; XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); kfree_skb(skb); return NULL; } skb_push(skb, skb->data - skb_mac_header(skb)); return skb; } skb_list_walk_safe(skb, skb2, nskb) { esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { skb2->next = nskb; } else if (err != -EINPROGRESS) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); skb2->next = nskb; kfree_skb_list(skb2); return NULL; } else { if (skb == skb2) skb = nskb; else pskb->next = nskb; continue; } skb_push(skb2, skb2->data - skb_mac_header(skb2)); pskb = skb2; } return skb; } EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; struct net_device *dev; struct xfrm_dev_offload *xso = &x->xso; xfrm_address_t *saddr; xfrm_address_t *daddr; bool is_packet_offload; if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); return -EINVAL; } if (xuo->flags & XFRM_OFFLOAD_INBOUND && x->if_id) { NL_SET_ERR_MSG(extack, "XFRM if_id is not supported in RX path"); return -EINVAL; } is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support TFC padding. */ if (x->tfcpad) { NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { struct xfrm_dst_lookup_params params; if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { saddr = &x->props.saddr; daddr = &x->id.daddr; } else { saddr = &x->id.daddr; daddr = &x->props.saddr; } memset(¶ms, 0, sizeof(params)); params.net = net; params.saddr = saddr; params.daddr = daddr; params.mark = xfrm_smark_get(0, x); dst = __xfrm_dst_lookup(x->props.family, ¶ms); if (IS_ERR(dst)) return (is_packet_offload) ? -EINVAL : 0; dev = dst->dev; dev_hold(dev); dst_release(dst); } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { xso->dev = NULL; dev_put(dev); return (is_packet_offload) ? -EINVAL : 0; } if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; } if (!x->type_offload) { NL_SET_ERR_MSG(extack, "Type doesn't support offload"); dev_put(dev); return -EINVAL; } xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; else xso->dir = XFRM_DEV_OFFLOAD_OUT; if (is_packet_offload) xso->type = XFRM_DEV_OFFLOAD_PACKET; else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; err = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, extack); if (err) { xso->dev = NULL; xso->dir = 0; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xfrm_unset_type_offload(x); /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. */ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) { NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state"); return err; } } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_state_add); int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack) { struct xfrm_dev_offload *xdo = &xp->xdo; struct net_device *dev; int err; if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) { /* We support only packet offload mode and it means * that user must set XFRM_OFFLOAD_PACKET bit. */ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) return -EINVAL; if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) { xdo->dev = NULL; dev_put(dev); NL_SET_ERR_MSG(extack, "Policy offload is not supported"); return -EINVAL; } xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: xdo->dir = XFRM_DEV_OFFLOAD_IN; break; case XFRM_POLICY_OUT: xdo->dir = XFRM_DEV_OFFLOAD_OUT; break; case XFRM_POLICY_FWD: xdo->dir = XFRM_DEV_OFFLOAD_FWD; break; default: xdo->dev = NULL; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG(extack, "Unrecognized offload direction"); return -EINVAL; } err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy"); return err; } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_policy_add); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { int mtu; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; bool check_tunnel_size; if (!x->type_offload || (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; if ((!dev || dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } return false; ok: if (!dev) return true; check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (x->inner_mode.family) { case AF_INET: /* Check for IPv4 options */ if (ip_hdr(skb)->ihl != 5) return false; if (check_tunnel_size && xfrm4_tunnel_check_size(skb)) return false; break; case AF_INET6: /* Check for IPv6 extensions */ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) return false; if (check_tunnel_size && xfrm6_tunnel_check_size(skb)) return false; break; default: break; } if (dev->xfrmdev_ops->xdo_dev_offload_ok) return dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); void xfrm_dev_resume(struct sk_buff *skb) { struct net_device *dev = skb->dev; int ret = NETDEV_TX_BUSY; struct netdev_queue *txq; struct softnet_data *sd; unsigned long flags; rcu_read_lock(); txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); if (!dev_xmit_complete(ret)) { local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); skb_queue_tail(&sd->xfrm_backlog, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_dev_resume); void xfrm_dev_backlog(struct softnet_data *sd) { struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; struct sk_buff_head list; struct sk_buff *skb; if (skb_queue_empty(xfrm_backlog)) return; __skb_queue_head_init(&list); spin_lock(&xfrm_backlog->lock); skb_queue_splice_init(xfrm_backlog, &list); spin_unlock(&xfrm_backlog->lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); xfrm_dev_resume(skb); } } #endif static int xfrm_api_check(struct net_device *dev) { #ifdef CONFIG_XFRM_OFFLOAD if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && !(dev->features & NETIF_F_HW_ESP)) return NOTIFY_BAD; if ((dev->features & NETIF_F_HW_ESP) && (!(dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_add && dev->xfrmdev_ops->xdo_dev_state_delete))) return NOTIFY_BAD; #else if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) return NOTIFY_BAD; #endif return NOTIFY_DONE; } static int xfrm_dev_down(struct net_device *dev) { if (dev->features & NETIF_F_HW_ESP) { xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_dev_policy_flush(dev_net(dev), dev, true); } return NOTIFY_DONE; } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: return xfrm_api_check(dev); case NETDEV_FEAT_CHANGE: return xfrm_api_check(dev); case NETDEV_DOWN: case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); } |
| 7 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | // SPDX-License-Identifier: GPL-2.0-or-later /* */ #include <linux/init.h> #include <linux/usb.h> #include <sound/core.h> #include <sound/info.h> #include <sound/pcm.h> #include "usbaudio.h" #include "helper.h" #include "card.h" #include "endpoint.h" #include "proc.h" /* convert our full speed USB rate into sampling rate in Hz */ static inline unsigned get_full_speed_hz(unsigned int usb_rate) { return (usb_rate * 125 + (1 << 12)) >> 13; } /* convert our high speed USB rate into sampling rate in Hz */ static inline unsigned get_high_speed_hz(unsigned int usb_rate) { return (usb_rate * 125 + (1 << 9)) >> 10; } /* * common proc files to show the usb device info */ static void proc_audio_usbbus_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_usb_audio *chip = entry->private_data; if (!atomic_read(&chip->shutdown)) snd_iprintf(buffer, "%03d/%03d\n", chip->dev->bus->busnum, chip->dev->devnum); } static void proc_audio_usbid_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_usb_audio *chip = entry->private_data; if (!atomic_read(&chip->shutdown)) snd_iprintf(buffer, "%04x:%04x\n", USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id)); } void snd_usb_audio_create_proc(struct snd_usb_audio *chip) { snd_card_ro_proc_new(chip->card, "usbbus", chip, proc_audio_usbbus_read); snd_card_ro_proc_new(chip->card, "usbid", chip, proc_audio_usbid_read); } static const char * const channel_labels[] = { [SNDRV_CHMAP_NA] = "N/A", [SNDRV_CHMAP_MONO] = "MONO", [SNDRV_CHMAP_FL] = "FL", [SNDRV_CHMAP_FR] = "FR", [SNDRV_CHMAP_FC] = "FC", [SNDRV_CHMAP_LFE] = "LFE", [SNDRV_CHMAP_RL] = "RL", [SNDRV_CHMAP_RR] = "RR", [SNDRV_CHMAP_FLC] = "FLC", [SNDRV_CHMAP_FRC] = "FRC", [SNDRV_CHMAP_RC] = "RC", [SNDRV_CHMAP_SL] = "SL", [SNDRV_CHMAP_SR] = "SR", [SNDRV_CHMAP_TC] = "TC", [SNDRV_CHMAP_TFL] = "TFL", [SNDRV_CHMAP_TFC] = "TFC", [SNDRV_CHMAP_TFR] = "TFR", [SNDRV_CHMAP_TRL] = "TRL", [SNDRV_CHMAP_TRC] = "TRC", [SNDRV_CHMAP_TRR] = "TRR", [SNDRV_CHMAP_TFLC] = "TFLC", [SNDRV_CHMAP_TFRC] = "TFRC", [SNDRV_CHMAP_LLFE] = "LLFE", [SNDRV_CHMAP_RLFE] = "RLFE", [SNDRV_CHMAP_TSL] = "TSL", [SNDRV_CHMAP_TSR] = "TSR", [SNDRV_CHMAP_BC] = "BC", [SNDRV_CHMAP_RLC] = "RLC", [SNDRV_CHMAP_RRC] = "RRC", }; /* * proc interface for list the supported pcm formats */ static void proc_dump_substream_formats(struct snd_usb_substream *subs, struct snd_info_buffer *buffer) { struct audioformat *fp; static const char * const sync_types[4] = { "NONE", "ASYNC", "ADAPTIVE", "SYNC" }; list_for_each_entry(fp, &subs->fmt_list, list) { snd_pcm_format_t fmt; snd_iprintf(buffer, " Interface %d\n", fp->iface); snd_iprintf(buffer, " Altset %d\n", fp->altsetting); snd_iprintf(buffer, " Format:"); pcm_for_each_format(fmt) if (fp->formats & pcm_format_to_bits(fmt)) snd_iprintf(buffer, " %s", snd_pcm_format_name(fmt)); snd_iprintf(buffer, "\n"); snd_iprintf(buffer, " Channels: %d\n", fp->channels); snd_iprintf(buffer, " Endpoint: 0x%02x (%d %s) (%s)\n", fp->endpoint, fp->endpoint & USB_ENDPOINT_NUMBER_MASK, fp->endpoint & USB_DIR_IN ? "IN" : "OUT", sync_types[(fp->ep_attr & USB_ENDPOINT_SYNCTYPE) >> 2]); if (fp->rates & SNDRV_PCM_RATE_CONTINUOUS) { snd_iprintf(buffer, " Rates: %d - %d (continuous)\n", fp->rate_min, fp->rate_max); } else { unsigned int i; snd_iprintf(buffer, " Rates: "); for (i = 0; i < fp->nr_rates; i++) { if (i > 0) snd_iprintf(buffer, ", "); snd_iprintf(buffer, "%d", fp->rate_table[i]); } snd_iprintf(buffer, "\n"); } if (subs->speed != USB_SPEED_FULL) snd_iprintf(buffer, " Data packet interval: %d us\n", 125 * (1 << fp->datainterval)); snd_iprintf(buffer, " Bits: %d\n", fp->fmt_bits); if (fp->dsd_raw) snd_iprintf(buffer, " DSD raw: DOP=%d, bitrev=%d\n", fp->dsd_dop, fp->dsd_bitrev); if (fp->chmap) { const struct snd_pcm_chmap_elem *map = fp->chmap; int c; snd_iprintf(buffer, " Channel map:"); for (c = 0; c < map->channels; c++) { if (map->map[c] >= ARRAY_SIZE(channel_labels) || !channel_labels[map->map[c]]) snd_iprintf(buffer, " --"); else snd_iprintf(buffer, " %s", channel_labels[map->map[c]]); } snd_iprintf(buffer, "\n"); } if (fp->sync_ep) { snd_iprintf(buffer, " Sync Endpoint: 0x%02x (%d %s)\n", fp->sync_ep, fp->sync_ep & USB_ENDPOINT_NUMBER_MASK, fp->sync_ep & USB_DIR_IN ? "IN" : "OUT"); snd_iprintf(buffer, " Sync EP Interface: %d\n", fp->sync_iface); snd_iprintf(buffer, " Sync EP Altset: %d\n", fp->sync_altsetting); snd_iprintf(buffer, " Implicit Feedback Mode: %s\n", fp->implicit_fb ? "Yes" : "No"); } // snd_iprintf(buffer, " Max Packet Size = %d\n", fp->maxpacksize); // snd_iprintf(buffer, " EP Attribute = %#x\n", fp->attributes); } } static void proc_dump_ep_status(struct snd_usb_substream *subs, struct snd_usb_endpoint *data_ep, struct snd_usb_endpoint *sync_ep, struct snd_info_buffer *buffer) { if (!data_ep) return; snd_iprintf(buffer, " Packet Size = %d\n", data_ep->curpacksize); snd_iprintf(buffer, " Momentary freq = %u Hz (%#x.%04x)\n", subs->speed == USB_SPEED_FULL ? get_full_speed_hz(data_ep->freqm) : get_high_speed_hz(data_ep->freqm), data_ep->freqm >> 16, data_ep->freqm & 0xffff); if (sync_ep && data_ep->freqshift != INT_MIN) { int res = 16 - data_ep->freqshift; snd_iprintf(buffer, " Feedback Format = %d.%d\n", (sync_ep->syncmaxsize > 3 ? 32 : 24) - res, res); } } static void proc_dump_substream_status(struct snd_usb_audio *chip, struct snd_usb_substream *subs, struct snd_info_buffer *buffer) { guard(mutex)(&chip->mutex); if (subs->running) { snd_iprintf(buffer, " Status: Running\n"); if (subs->cur_audiofmt) { snd_iprintf(buffer, " Interface = %d\n", subs->cur_audiofmt->iface); snd_iprintf(buffer, " Altset = %d\n", subs->cur_audiofmt->altsetting); } proc_dump_ep_status(subs, subs->data_endpoint, subs->sync_endpoint, buffer); } else { snd_iprintf(buffer, " Status: Stop\n"); } } static void proc_pcm_format_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_usb_stream *stream = entry->private_data; struct snd_usb_audio *chip = stream->chip; snd_iprintf(buffer, "%s : %s\n", chip->card->longname, stream->pcm->name); if (stream->substream[SNDRV_PCM_STREAM_PLAYBACK].num_formats) { snd_iprintf(buffer, "\nPlayback:\n"); proc_dump_substream_status(chip, &stream->substream[SNDRV_PCM_STREAM_PLAYBACK], buffer); proc_dump_substream_formats(&stream->substream[SNDRV_PCM_STREAM_PLAYBACK], buffer); } if (stream->substream[SNDRV_PCM_STREAM_CAPTURE].num_formats) { snd_iprintf(buffer, "\nCapture:\n"); proc_dump_substream_status(chip, &stream->substream[SNDRV_PCM_STREAM_CAPTURE], buffer); proc_dump_substream_formats(&stream->substream[SNDRV_PCM_STREAM_CAPTURE], buffer); } } void snd_usb_proc_pcm_format_add(struct snd_usb_stream *stream) { char name[32]; struct snd_card *card = stream->chip->card; scnprintf(name, sizeof(name), "stream%d", stream->pcm_index); snd_card_ro_proc_new(card, name, stream, proc_pcm_format_read); } |
| 217 217 467 467 153 153 52 52 48 48 472 471 472 143 142 142 143 113 113 253 253 253 253 119 119 119 110 28 119 58 57 58 43 4 1 4 4 4 4 4 58 18 30 30 30 28 28 28 26 35 21 35 5 5 4 66 2 64 575 574 573 575 574 537 586 586 540 584 570 582 582 540 581 582 525 584 524 514 291 262 4 262 4 1 262 8 3 3 5 1 4 2 2 258 68 4 66 66 66 66 66 65 66 201 2 2 2 200 3 200 6 6 5 8 8 246 58 58 47 58 58 58 58 6 6 6 864 863 480 479 3 2 385 384 84 100 15 5 1 2 7 6 6 6 6 6 2 2 1 1 2 10 10 10 10 11 1 11 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/bpf-cgroup.h> #include <linux/cgroup.h> #include <linux/rcupdate.h> #include <linux/random.h> #include <linux/smp.h> #include <linux/topology.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/uidgid.h> #include <linux/filter.h> #include <linux/ctype.h> #include <linux/jiffies.h> #include <linux/pid_namespace.h> #include <linux/poison.h> #include <linux/proc_ns.h> #include <linux/sched/task.h> #include <linux/security.h> #include <linux/btf_ids.h> #include <linux/bpf_mem_alloc.h> #include <linux/kasan.h> #include <linux/bpf_verifier.h> #include <linux/uaccess.h> #include <linux/verification.h> #include <linux/task_work.h> #include <linux/irq_work.h> #include "../../lib/kstrtox.h" /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments * * Different map implementations will rely on rcu in map methods * lookup/update/delete, therefore eBPF programs must run under rcu lock * if program is allowed to access maps, so check rcu_read_lock_held() or * rcu_read_lock_trace_held() in all three functions. */ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, void *, value, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, .arg3_type = ARG_PTR_TO_MAP_VALUE, .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); return map->ops->map_delete_elem(map, key); } const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) { return map->ops->map_push_elem(map, value, flags); } const struct bpf_func_proto bpf_map_push_elem_proto = { .func = bpf_map_push_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE, .arg3_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) { return map->ops->map_pop_elem(map, value); } const struct bpf_func_proto bpf_map_pop_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) { return map->ops->map_peek_elem(map, value); } const struct bpf_func_proto bpf_map_peek_elem_proto = { .func = bpf_map_peek_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) { WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); } const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { .func = bpf_map_lookup_percpu_elem, .gpl_only = false, .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, .arg3_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_get_prandom_u32_proto = { .func = bpf_user_rnd_u32, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .func = bpf_get_smp_processor_id, .gpl_only = false, .ret_type = RET_INTEGER, .allow_fastcall = true, }; BPF_CALL_0(bpf_get_numa_node_id) { return numa_node_id(); } const struct bpf_func_proto bpf_get_numa_node_id_proto = { .func = bpf_get_numa_node_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_ns_proto = { .func = bpf_ktime_get_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_boot_ns) { /* NMI safe access to clock boottime */ return ktime_get_boot_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { .func = bpf_ktime_get_boot_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_coarse_ns) { return ktime_get_coarse_ns(); } const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { .func = bpf_ktime_get_coarse_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_ktime_get_tai_ns) { /* NMI safe access to clock tai */ return ktime_get_tai_fast_ns(); } const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = { .func = bpf_ktime_get_tai_ns, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; } const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .func = bpf_get_current_pid_tgid, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; kgid_t gid; if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .func = bpf_get_current_uid_gid, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; if (unlikely(!task)) goto err_clear; /* Verifier guarantees that size > 0 */ strscpy_pad(buf, task->comm, size); return 0; err_clear: memset(buf, 0, size); return -EINVAL; } const struct bpf_func_proto bpf_get_current_comm_proto = { .func = bpf_get_current_comm, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE, }; #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) { arch_spinlock_t *l = (void *)lock; union { __u32 val; arch_spinlock_t lock; } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); preempt_disable(); arch_spin_lock(l); } static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) { arch_spinlock_t *l = (void *)lock; arch_spin_unlock(l); preempt_enable(); } #else static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) { atomic_t *l = (void *)lock; BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); do { atomic_cond_read_relaxed(l, !VAL); } while (atomic_xchg(l, 1)); } static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) { atomic_t *l = (void *)lock; atomic_set_release(l, 0); } #endif static DEFINE_PER_CPU(unsigned long, irqsave_flags); static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) { unsigned long flags; local_irq_save(flags); __bpf_spin_lock(lock); __this_cpu_write(irqsave_flags, flags); } NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; } const struct bpf_func_proto bpf_spin_lock_proto = { .func = bpf_spin_lock, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, .arg1_btf_id = BPF_PTR_POISON, }; static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) { unsigned long flags; flags = __this_cpu_read(irqsave_flags); __bpf_spin_unlock(lock); local_irq_restore(flags); } NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; } const struct bpf_func_proto bpf_spin_unlock_proto = { .func = bpf_spin_unlock, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, .arg1_btf_id = BPF_PTR_POISON, }; void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src) { struct bpf_spin_lock *lock; if (lock_src) lock = src + map->record->spin_lock_off; else lock = dst + map->record->spin_lock_off; preempt_disable(); __bpf_spin_lock_irqsave(lock); copy_map_value(map, dst, src); __bpf_spin_unlock_irqrestore(lock); preempt_enable(); } BPF_CALL_0(bpf_jiffies64) { return get_jiffies_64(); } const struct bpf_func_proto bpf_jiffies64_proto = { .func = bpf_jiffies64, .gpl_only = false, .ret_type = RET_INTEGER, }; #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { struct cgroup *cgrp; u64 cgrp_id; rcu_read_lock(); cgrp = task_dfl_cgroup(current); cgrp_id = cgroup_id(cgrp); rcu_read_unlock(); return cgrp_id; } const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { .func = bpf_get_current_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) { struct cgroup *cgrp; struct cgroup *ancestor; u64 cgrp_id; rcu_read_lock(); cgrp = task_dfl_cgroup(current); ancestor = cgroup_ancestor(cgrp, ancestor_level); cgrp_id = ancestor ? cgroup_id(ancestor) : 0; rcu_read_unlock(); return cgrp_id; } const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { .func = bpf_get_current_ancestor_cgroup_id, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, }; #endif /* CONFIG_CGROUPS */ #define BPF_STRTOX_BASE_MASK 0x1F static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, unsigned long long *res, bool *is_negative) { unsigned int base = flags & BPF_STRTOX_BASE_MASK; const char *cur_buf = buf; size_t cur_len = buf_len; unsigned int consumed; size_t val_len; char str[64]; if (!buf || !buf_len || !res || !is_negative) return -EINVAL; if (base != 0 && base != 8 && base != 10 && base != 16) return -EINVAL; if (flags & ~BPF_STRTOX_BASE_MASK) return -EINVAL; while (cur_buf < buf + buf_len && isspace(*cur_buf)) ++cur_buf; *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); if (*is_negative) ++cur_buf; consumed = cur_buf - buf; cur_len -= consumed; if (!cur_len) return -EINVAL; cur_len = min(cur_len, sizeof(str) - 1); memcpy(str, cur_buf, cur_len); str[cur_len] = '\0'; cur_buf = str; cur_buf = _parse_integer_fixup_radix(cur_buf, &base); val_len = _parse_integer(cur_buf, base, res); if (val_len & KSTRTOX_OVERFLOW) return -ERANGE; if (val_len == 0) return -EINVAL; cur_buf += val_len; consumed += cur_buf - str; return consumed; } static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, long long *res) { unsigned long long _res; bool is_negative; int err; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; if (is_negative) { if ((long long)-_res > 0) return -ERANGE; *res = -_res; } else { if ((long long)_res < 0) return -ERANGE; *res = _res; } return err; } BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, s64 *, res) { long long _res; int err; *res = 0; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; *res = _res; return err; } const struct bpf_func_proto bpf_strtol_proto = { .func = bpf_strtol, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(s64), }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, u64 *, res) { unsigned long long _res; bool is_negative; int err; *res = 0; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; if (is_negative) return -EINVAL; *res = _res; return err; } const struct bpf_func_proto bpf_strtoul_proto = { .func = bpf_strtoul, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(u64), }; BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) { return strncmp(s1, s2, s1_sz); } static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_CONST_STR, }; BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, struct bpf_pidns_info *, nsdata, u32, size) { struct task_struct *task = current; struct pid_namespace *pidns; int err = -EINVAL; if (unlikely(size != sizeof(struct bpf_pidns_info))) goto clear; if (unlikely((u64)(dev_t)dev != dev)) goto clear; if (unlikely(!task)) goto clear; pidns = task_active_pid_ns(task); if (unlikely(!pidns)) { err = -ENOENT; goto clear; } if (!ns_match(&pidns->ns, (dev_t)dev, ino)) goto clear; nsdata->pid = task_pid_nr_ns(task, pidns); nsdata->tgid = task_tgid_nr_ns(task, pidns); return 0; clear: memset((void *)nsdata, 0, (size_t) size); return err; } const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { .func = bpf_get_ns_current_pid_tgid, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_UNINIT_MEM, .arg4_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { .func = bpf_get_raw_cpu_id, .gpl_only = false, .ret_type = RET_INTEGER, }; BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags, void *, data, u64, size) { if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; return bpf_event_output(map, flags, data, size, NULL, 0, NULL); } const struct bpf_func_proto bpf_event_output_data_proto = { .func = bpf_event_output_data, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, const void __user *, user_ptr) { int ret = copy_from_user(dst, user_ptr, size); if (unlikely(ret)) { memset(dst, 0, size); ret = -EFAULT; } return ret; } const struct bpf_func_proto bpf_copy_from_user_proto = { .func = bpf_copy_from_user, .gpl_only = false, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, const void __user *, user_ptr, struct task_struct *, tsk, u64, flags) { int ret; /* flags is not used yet */ if (unlikely(flags)) return -EINVAL; if (unlikely(!size)) return 0; ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0); if (ret == size) return 0; memset(dst, 0, size); /* Return -EFAULT for partial read */ return ret < 0 ? ret : -EFAULT; } const struct bpf_func_proto bpf_copy_from_user_task_proto = { .func = bpf_copy_from_user_task, .gpl_only = true, .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_BTF_ID, .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg5_type = ARG_ANYTHING }; BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) { if (cpu >= nr_cpu_ids) return (unsigned long)NULL; return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu); } const struct bpf_func_proto bpf_per_cpu_ptr_proto = { .func = bpf_per_cpu_ptr, .gpl_only = false, .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, .arg2_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) { return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr); } const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .func = bpf_this_cpu_ptr, .gpl_only = false, .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, size_t bufsz) { void __user *user_ptr = (__force void __user *)unsafe_ptr; buf[0] = 0; switch (fmt_ptype) { case 's': #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if ((unsigned long)unsafe_ptr < TASK_SIZE) return strncpy_from_user_nofault(buf, user_ptr, bufsz); fallthrough; #endif case 'k': return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); case 'u': return strncpy_from_user_nofault(buf, user_ptr, bufsz); } return -EINVAL; } /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs) { int nest_level; nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { this_cpu_dec(bpf_bprintf_nest_level); return -EBUSY; } *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } void bpf_put_buffers(void) { if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) return; this_cpu_dec(bpf_bprintf_nest_level); } void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { if (!data->bin_args && !data->buf) return; bpf_put_buffers(); } /* * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers * * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in * data->bin_args a binary representation of arguments usable by bstr_printf * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data) { bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; fmt_end = strnchr(fmt, fmt_size, 0); if (!fmt_end) return -EINVAL; fmt_size = fmt_end - fmt; if (get_buffers && bpf_try_get_buffers(&buffers)) return -EBUSY; if (data->get_bin_args) { if (num_args) tmp_buf = buffers->bin_args; tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; data->bin_args = (u32 *)tmp_buf; } if (data->get_buf) data->buf = buffers->buf; for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; goto out; } if (fmt[i] != '%') continue; if (fmt[i + 1] == '%') { i++; continue; } if (num_spec >= num_args) { err = -EINVAL; goto out; } /* The string is zero-terminated so if fmt[i] != 0, we can * always access fmt[i + 1], in the worst case it will be a 0 */ i++; /* skip optional "[0 +-][num]" width formatting field */ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || fmt[i] == ' ') i++; if (fmt[i] >= '1' && fmt[i] <= '9') { i++; while (fmt[i] >= '0' && fmt[i] <= '9') i++; } if (fmt[i] == 'p') { sizeof_cur_arg = sizeof(long); if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || ispunct(fmt[i + 1])) { if (tmp_buf) cur_arg = raw_args[num_spec]; goto nocopy_fmt; } if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && fmt[i + 2] == 's') { fmt_ptype = fmt[i + 1]; i += 2; goto fmt_str; } if (fmt[i + 1] == 'K' || fmt[i + 1] == 'x' || fmt[i + 1] == 's' || fmt[i + 1] == 'S') { if (tmp_buf) cur_arg = raw_args[num_spec]; i++; goto nocopy_fmt; } if (fmt[i + 1] == 'B') { if (tmp_buf) { err = snprintf(tmp_buf, (tmp_buf_end - tmp_buf), "%pB", (void *)(long)raw_args[num_spec]); tmp_buf += (err + 1); } i++; num_spec++; continue; } /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { err = -EINVAL; goto out; } i += 2; if (!tmp_buf) goto nocopy_fmt; sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { err = -ENOSPC; goto out; } unsafe_ptr = (char *)(long)raw_args[num_spec]; err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, sizeof_cur_ip); if (err < 0) memset(cur_ip, 0, sizeof_cur_ip); /* hack: bstr_printf expects IP addresses to be * pre-formatted as strings, ironically, the easiest way * to do that is to call snprintf. */ ip_spec[2] = fmt[i - 1]; ip_spec[3] = fmt[i]; err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, ip_spec, &cur_ip); tmp_buf += err + 1; num_spec++; continue; } else if (fmt[i] == 's') { fmt_ptype = fmt[i]; fmt_str: if (fmt[i + 1] != 0 && !isspace(fmt[i + 1]) && !ispunct(fmt[i + 1])) { err = -EINVAL; goto out; } if (!tmp_buf) goto nocopy_fmt; if (tmp_buf_end == tmp_buf) { err = -ENOSPC; goto out; } unsafe_ptr = (char *)(long)raw_args[num_spec]; err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, fmt_ptype, tmp_buf_end - tmp_buf); if (err < 0) { tmp_buf[0] = '\0'; err = 1; } tmp_buf += err; num_spec++; continue; } else if (fmt[i] == 'c') { if (!tmp_buf) goto nocopy_fmt; if (tmp_buf_end == tmp_buf) { err = -ENOSPC; goto out; } *tmp_buf = raw_args[num_spec]; tmp_buf++; num_spec++; continue; } sizeof_cur_arg = sizeof(int); if (fmt[i] == 'l') { sizeof_cur_arg = sizeof(long); i++; } if (fmt[i] == 'l') { sizeof_cur_arg = sizeof(long long); i++; } if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x' && fmt[i] != 'X') { err = -EINVAL; goto out; } if (tmp_buf) cur_arg = raw_args[num_spec]; nocopy_fmt: if (tmp_buf) { tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { err = -ENOSPC; goto out; } if (sizeof_cur_arg == 8) { *(u32 *)tmp_buf = *(u32 *)&cur_arg; *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); } else { *(u32 *)tmp_buf = (u32)(long)cur_arg; } tmp_buf += sizeof_cur_arg; } num_spec++; } err = 0; out: if (err) bpf_bprintf_cleanup(data); return err; } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, const void *, args, u32, data_len) { struct bpf_bprintf_data data = { .get_bin_args = true, }; int err, num_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; err = bstr_printf(str, str_size, fmt, data.bin_args); bpf_bprintf_cleanup(&data); return err + 1; } const struct bpf_func_proto bpf_snprintf_proto = { .func = bpf_snprintf, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_CONST_STR, .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx) { if (map->map_type == BPF_MAP_TYPE_ARRAY) { struct bpf_array *array = container_of(map, struct bpf_array, map); *arr_idx = ((char *)value - array->value) / array->elem_size; return arr_idx; } return (void *)value - round_up(map->key_size, 8); } struct bpf_async_cb { struct bpf_map *map; struct bpf_prog *prog; void __rcu *callback_fn; void *value; union { struct rcu_head rcu; struct work_struct delete_work; }; u64 flags; }; /* BPF map elements can contain 'struct bpf_timer'. * Such map owns all of its BPF timers. * 'struct bpf_timer' is allocated as part of map element allocation * and it's zero initialized. * That space is used to keep 'struct bpf_async_kern'. * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and * remembers 'struct bpf_map *' pointer it's part of. * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. * bpf_timer_start() arms the timer. * If user space reference to a map goes to zero at this point * ops->map_release_uref callback is responsible for cancelling the timers, * freeing their memory, and decrementing prog's refcnts. * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. * Inner maps can contain bpf timers as well. ops->map_release_uref is * freeing the timers when inner map is replaced or deleted by user space. */ struct bpf_hrtimer { struct bpf_async_cb cb; struct hrtimer timer; atomic_t cancelling; }; struct bpf_work { struct bpf_async_cb cb; struct work_struct work; struct work_struct delete_work; }; /* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */ struct bpf_async_kern { union { struct bpf_async_cb *cb; struct bpf_hrtimer *timer; struct bpf_work *work; }; /* bpf_spin_lock is used here instead of spinlock_t to make * sure that it always fits into space reserved by struct bpf_timer * regardless of LOCKDEP and spinlock debug flags. */ struct bpf_spin_lock lock; } __attribute__((aligned(8))); enum bpf_async_type { BPF_ASYNC_TYPE_TIMER = 0, BPF_ASYNC_TYPE_WQ, }; static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) { struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); struct bpf_map *map = t->cb.map; void *value = t->cb.value; bpf_callback_t callback_fn; void *key; u32 idx; BTF_TYPE_EMIT(struct bpf_timer); callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held()); if (!callback_fn) goto out; /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and * cannot be preempted by another bpf_timer_cb() on the same cpu. * Remember the timer this callback is servicing to prevent * deadlock if callback_fn() calls bpf_timer_cancel() or * bpf_map_delete_elem() on the same timer. */ this_cpu_write(hrtimer_running, t); key = map_key_from_value(map, value, &idx); callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); /* The verifier checked that return value is zero. */ this_cpu_write(hrtimer_running, NULL); out: return HRTIMER_NORESTART; } static void bpf_wq_work(struct work_struct *work) { struct bpf_work *w = container_of(work, struct bpf_work, work); struct bpf_async_cb *cb = &w->cb; struct bpf_map *map = cb->map; bpf_callback_t callback_fn; void *value = cb->value; void *key; u32 idx; BTF_TYPE_EMIT(struct bpf_wq); callback_fn = READ_ONCE(cb->callback_fn); if (!callback_fn) return; key = map_key_from_value(map, value, &idx); rcu_read_lock_trace(); migrate_disable(); callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); migrate_enable(); rcu_read_unlock_trace(); } static void bpf_async_cb_rcu_free(struct rcu_head *rcu) { struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu); kfree_nolock(cb); } static void bpf_wq_delete_work(struct work_struct *work) { struct bpf_work *w = container_of(work, struct bpf_work, delete_work); cancel_work_sync(&w->work); call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free); } static void bpf_timer_delete_work(struct work_struct *work) { struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); /* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call * call_rcu() right after for both preallocated and non-preallocated * maps. The async->cb = NULL was already done and no code path can see * address 't' anymore. Timer if armed for existing bpf_hrtimer before * bpf_timer_cancel_and_free will have been cancelled. */ hrtimer_cancel(&t->timer); call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); } static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, enum bpf_async_type type) { struct bpf_async_cb *cb; struct bpf_hrtimer *t; struct bpf_work *w; clockid_t clockid; size_t size; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; switch (type) { case BPF_ASYNC_TYPE_TIMER: size = sizeof(struct bpf_hrtimer); break; case BPF_ASYNC_TYPE_WQ: size = sizeof(struct bpf_work); break; default: return -EINVAL; } __bpf_spin_lock_irqsave(&async->lock); t = async->timer; if (t) { ret = -EBUSY; goto out; } cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node); if (!cb) { ret = -ENOMEM; goto out; } switch (type) { case BPF_ASYNC_TYPE_TIMER: clockid = flags & (MAX_CLOCKS - 1); t = (struct bpf_hrtimer *)cb; atomic_set(&t->cancelling, 0); INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); hrtimer_setup(&t->timer, bpf_timer_cb, clockid, HRTIMER_MODE_REL_SOFT); cb->value = (void *)async - map->record->timer_off; break; case BPF_ASYNC_TYPE_WQ: w = (struct bpf_work *)cb; INIT_WORK(&w->work, bpf_wq_work); INIT_WORK(&w->delete_work, bpf_wq_delete_work); cb->value = (void *)async - map->record->wq_off; break; } cb->map = map; cb->prog = NULL; cb->flags = flags; rcu_assign_pointer(cb->callback_fn, NULL); WRITE_ONCE(async->cb, cb); /* Guarantee the order between async->cb and map->usercnt. So * when there are concurrent uref release and bpf timer init, either * bpf_timer_cancel_and_free() called by uref release reads a no-NULL * timer or atomic64_read() below returns a zero usercnt. */ smp_mb(); if (!atomic64_read(&map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. */ WRITE_ONCE(async->cb, NULL); kfree_nolock(cb); ret = -EPERM; } out: __bpf_spin_unlock_irqrestore(&async->lock); return ret; } BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map, u64, flags) { clock_t clockid = flags & (MAX_CLOCKS - 1); BUILD_BUG_ON(MAX_CLOCKS != 16); BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer)); BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer)); if (flags >= MAX_CLOCKS || /* similar to timerfd except _ALARM variants are not supported */ (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_BOOTTIME)) return -EINVAL; return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER); } static const struct bpf_func_proto bpf_timer_init_proto = { .func = bpf_timer_init, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn, struct bpf_prog_aux *aux, unsigned int flags, enum bpf_async_type type) { struct bpf_prog *prev, *prog = aux->prog; struct bpf_async_cb *cb; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; __bpf_spin_lock_irqsave(&async->lock); cb = async->cb; if (!cb) { ret = -EINVAL; goto out; } if (!atomic64_read(&cb->map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. Otherwise timer might still be * running even when bpf prog is detached and user space * is gone, since map_release_uref won't ever be called. */ ret = -EPERM; goto out; } prev = cb->prog; if (prev != prog) { /* Bump prog refcnt once. Every bpf_timer_set_callback() * can pick different callback_fn-s within the same prog. */ prog = bpf_prog_inc_not_zero(prog); if (IS_ERR(prog)) { ret = PTR_ERR(prog); goto out; } if (prev) /* Drop prev prog refcnt when swapping with new prog */ bpf_prog_put(prev); cb->prog = prog; } rcu_assign_pointer(cb->callback_fn, callback_fn); out: __bpf_spin_unlock_irqrestore(&async->lock); return ret; } BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn, struct bpf_prog_aux *, aux) { return __bpf_async_set_callback(timer, callback_fn, aux, 0, BPF_ASYNC_TYPE_TIMER); } static const struct bpf_func_proto bpf_timer_set_callback_proto = { .func = bpf_timer_set_callback, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_PTR_TO_FUNC, }; BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags) { struct bpf_hrtimer *t; int ret = 0; enum hrtimer_mode mode; if (in_nmi()) return -EOPNOTSUPP; if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t || !t->cb.prog) { ret = -EINVAL; goto out; } if (flags & BPF_F_TIMER_ABS) mode = HRTIMER_MODE_ABS_SOFT; else mode = HRTIMER_MODE_REL_SOFT; if (flags & BPF_F_TIMER_CPU_PIN) mode |= HRTIMER_MODE_PINNED; hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; } static const struct bpf_func_proto bpf_timer_start_proto = { .func = bpf_timer_start, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, }; static void drop_prog_refcnt(struct bpf_async_cb *async) { struct bpf_prog *prog = async->prog; if (prog) { bpf_prog_put(prog); async->prog = NULL; rcu_assign_pointer(async->callback_fn, NULL); } } BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) { struct bpf_hrtimer *t, *cur_t; bool inc = false; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { ret = -EINVAL; goto out; } cur_t = this_cpu_read(hrtimer_running); if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock * since it waits for callback_fn to finish. */ ret = -EDEADLK; goto out; } /* Only account in-flight cancellations when invoked from a timer * callback, since we want to avoid waiting only if other _callbacks_ * are waiting on us, to avoid introducing lockups. Non-callback paths * are ok, since nobody would synchronously wait for their completion. */ if (!cur_t) goto drop; atomic_inc(&t->cancelling); /* Need full barrier after relaxed atomic_inc */ smp_mb__after_atomic(); inc = true; if (atomic_read(&cur_t->cancelling)) { /* We're cancelling timer t, while some other timer callback is * attempting to cancel us. In such a case, it might be possible * that timer t belongs to the other callback, or some other * callback waiting upon it (creating transitive dependencies * upon us), and we will enter a deadlock if we continue * cancelling and waiting for it synchronously, since it might * do the same. Bail! */ ret = -EDEADLK; goto out; } drop: drop_prog_refcnt(&t->cb); out: __bpf_spin_unlock_irqrestore(&timer->lock); /* Cancel the timer and wait for associated callback to finish * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); if (inc) atomic_dec(&t->cancelling); rcu_read_unlock(); return ret; } static const struct bpf_func_proto bpf_timer_cancel_proto = { .func = bpf_timer_cancel, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_TIMER, }; static struct bpf_async_cb *__bpf_async_cancel_and_free(struct bpf_async_kern *async) { struct bpf_async_cb *cb; /* Performance optimization: read async->cb without lock first. */ if (!READ_ONCE(async->cb)) return NULL; __bpf_spin_lock_irqsave(&async->lock); /* re-read it under lock */ cb = async->cb; if (!cb) goto out; drop_prog_refcnt(cb); /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized. */ WRITE_ONCE(async->cb, NULL); out: __bpf_spin_unlock_irqrestore(&async->lock); return cb; } /* This function is called by map_delete/update_elem for individual element and * by ops->map_release_uref when the user space reference to a map reaches zero. */ void bpf_timer_cancel_and_free(void *val) { struct bpf_hrtimer *t; t = (struct bpf_hrtimer *)__bpf_async_cancel_and_free(val); if (!t) return; /* We check that bpf_map_delete/update_elem() was called from timer * callback_fn. In such case we don't call hrtimer_cancel() (since it * will deadlock) and don't call hrtimer_try_to_cancel() (since it will * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. * * However, it is possible the timer callback_fn calling us armed the * timer _before_ calling us, such that failing to cancel it here will * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. * Therefore, we _need_ to cancel any outstanding timers before we do * call_rcu, even though no more timers can be armed. * * Moreover, we need to schedule work even if timer does not belong to * the calling callback_fn, as on two different CPUs, we can end up in a * situation where both sides run in parallel, try to cancel one * another, and we end up waiting on both sides in hrtimer_cancel * without making forward progress, since timer1 depends on time2 * callback to finish, and vice versa. * * CPU 1 (timer1_cb) CPU 2 (timer2_cb) * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) * * To avoid these issues, punt to workqueue context when we are in a * timer callback. */ if (this_cpu_read(hrtimer_running)) { queue_work(system_dfl_wq, &t->cb.delete_work); return; } if (IS_ENABLED(CONFIG_PREEMPT_RT)) { /* If the timer is running on other CPU, also use a kworker to * wait for the completion of the timer instead of trying to * acquire a sleepable lock in hrtimer_cancel() to wait for its * completion. */ if (hrtimer_try_to_cancel(&t->timer) >= 0) call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); else queue_work(system_dfl_wq, &t->cb.delete_work); } else { bpf_timer_delete_work(&t->cb.delete_work); } } /* This function is called by map_delete/update_elem for individual element and * by ops->map_release_uref when the user space reference to a map reaches zero. */ void bpf_wq_cancel_and_free(void *val) { struct bpf_work *work; BTF_TYPE_EMIT(struct bpf_wq); work = (struct bpf_work *)__bpf_async_cancel_and_free(val); if (!work) return; /* Trigger cancel of the sleepable work, but *do not* wait for * it to finish if it was running as we might not be in a * sleepable context. * kfree will be called once the work has finished. */ schedule_work(&work->delete_work); } BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr) { unsigned long *kptr = dst; /* This helper may be inlined by verifier. */ return xchg(kptr, (unsigned long)ptr); } /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to * denote type that verifier will determine. */ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .func = bpf_kptr_xchg, .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, .ret_btf_id = BPF_PTR_POISON, .arg1_type = ARG_KPTR_XCHG_DEST, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, .arg2_btf_id = BPF_PTR_POISON, }; /* Since the upper 8 bits of dynptr->size is reserved, the * maximum supported size is 2^24 - 1. */ #define DYNPTR_MAX_SIZE ((1UL << 24) - 1) #define DYNPTR_TYPE_SHIFT 28 #define DYNPTR_SIZE_MASK 0xFFFFFF #define DYNPTR_RDONLY_BIT BIT(31) bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_RDONLY_BIT; } void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) { ptr->size |= DYNPTR_RDONLY_BIT; } static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) { ptr->size |= type << DYNPTR_TYPE_SHIFT; } static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr) { return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; } u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; } static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size) { u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; ptr->size = new_size | metadata; } int bpf_dynptr_check_size(u32 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size) { ptr->data = data; ptr->offset = offset; ptr->size = size; bpf_dynptr_set_type(ptr, type); } void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) { memset(ptr, 0, sizeof(*ptr)); } BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) { int err; BTF_TYPE_EMIT(struct bpf_dynptr); err = bpf_dynptr_check_size(size); if (err) goto error; /* flags is currently unsupported */ if (flags) { err = -EINVAL; goto error; } bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); return 0; error: bpf_dynptr_set_null(ptr); return err; } static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .func = bpf_dynptr_from_mem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, u32 offset, u64 flags) { enum bpf_dynptr_type type; int err; if (!src->data || flags) return -EINVAL; err = bpf_dynptr_check_off_len(src, offset, len); if (err) return err; type = bpf_dynptr_get_type(src); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: /* Source and destination may possibly overlap, hence use memmove to * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr * pointing to overlapping PTR_TO_MAP_VALUE regions. */ memmove(dst, src->data + src->offset + offset, len); return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_SKB_META: memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); return 0; default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; } } BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, u32, offset, u64, flags) { return __bpf_dynptr_read(dst, len, src, offset, flags); } static const struct bpf_func_proto bpf_dynptr_read_proto = { .func = bpf_dynptr_read, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, u32 len, u64 flags) { enum bpf_dynptr_type type; int err; if (!dst->data || __bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); if (err) return err; type = bpf_dynptr_get_type(dst); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: if (flags) return -EINVAL; /* Source and destination may possibly overlap, hence use memmove to * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr * pointing to overlapping PTR_TO_MAP_VALUE regions. */ memmove(dst->data + dst->offset + offset, src, len); return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, flags); case BPF_DYNPTR_TYPE_XDP: if (flags) return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); case BPF_DYNPTR_TYPE_SKB_META: if (flags) return -EINVAL; memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len); return 0; default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; } } BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len, u64, flags) { return __bpf_dynptr_write(dst, offset, src, len, flags); } static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) { enum bpf_dynptr_type type; int err; if (!ptr->data) return 0; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) return 0; if (__bpf_dynptr_is_rdonly(ptr)) return 0; type = bpf_dynptr_get_type(ptr); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: case BPF_DYNPTR_TYPE_XDP: case BPF_DYNPTR_TYPE_SKB_META: /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); return 0; } } static const struct bpf_func_proto bpf_dynptr_data_proto = { .func = bpf_dynptr_data, .gpl_only = false, .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto bpf_perf_event_read_proto __weak; const struct bpf_func_proto bpf_send_signal_proto __weak; const struct bpf_func_proto bpf_send_signal_thread_proto __weak; const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak; const struct bpf_func_proto bpf_get_task_stack_proto __weak; const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak; const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; case BPF_FUNC_map_push_elem: return &bpf_map_push_elem_proto; case BPF_FUNC_map_pop_elem: return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; case BPF_FUNC_map_lookup_percpu_elem: return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; case BPF_FUNC_get_numa_node_id: return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_ktime_get_tai_ns: return &bpf_ktime_get_tai_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: return &bpf_ringbuf_reserve_proto; case BPF_FUNC_ringbuf_submit: return &bpf_ringbuf_submit_proto; case BPF_FUNC_ringbuf_discard: return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; case BPF_FUNC_strtol: return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; case BPF_FUNC_get_current_pid_tgid: return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_ns_current_pid_tgid: return &bpf_get_ns_current_pid_tgid_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; default: break; } if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return NULL; switch (func_id) { case BPF_FUNC_spin_lock: return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_timer_init: return &bpf_timer_init_proto; case BPF_FUNC_timer_set_callback: return &bpf_timer_set_callback_proto; case BPF_FUNC_timer_start: return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; case BPF_FUNC_kptr_xchg: return &bpf_kptr_xchg_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_user_ringbuf_drain: return &bpf_user_ringbuf_drain_proto; case BPF_FUNC_ringbuf_reserve_dynptr: return &bpf_ringbuf_reserve_dynptr_proto; case BPF_FUNC_ringbuf_submit_dynptr: return &bpf_ringbuf_submit_dynptr_proto; case BPF_FUNC_ringbuf_discard_dynptr: return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_dynptr_from_mem: return &bpf_dynptr_from_mem_proto; case BPF_FUNC_dynptr_read: return &bpf_dynptr_read_proto; case BPF_FUNC_dynptr_write: return &bpf_dynptr_write_proto; case BPF_FUNC_dynptr_data: return &bpf_dynptr_data_proto; #ifdef CONFIG_CGROUPS case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_get_current_ancestor_cgroup_id: return &bpf_get_current_ancestor_cgroup_id_proto; case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; #endif #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif case BPF_FUNC_task_storage_get: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; default: break; } if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) return NULL; switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: return &bpf_get_current_task_btf_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_copy_from_user: return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: return &bpf_copy_from_user_task_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); case BPF_FUNC_perf_event_read_value: return bpf_get_perf_event_read_value_proto(); case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_send_signal: return &bpf_send_signal_proto; case BPF_FUNC_send_signal_thread: return &bpf_send_signal_thread_proto; case BPF_FUNC_get_task_stack: return prog->sleepable ? &bpf_get_task_stack_sleepable_proto : &bpf_get_task_stack_proto; case BPF_FUNC_get_branch_snapshot: return &bpf_get_branch_snapshot_proto; case BPF_FUNC_find_vma: return &bpf_find_vma_proto; default: return NULL; } } EXPORT_SYMBOL_GPL(bpf_base_func_proto); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { struct list_head *head = list_head, *orig_head = list_head; BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head)); BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head)); /* Do the actual list draining outside the lock to not hold the lock for * too long, and also prevent deadlocks if tracing programs end up * executing on entry/exit of functions called inside the critical * section, and end up doing map ops that call bpf_list_head_free for * the same map value again. */ __bpf_spin_lock_irqsave(spin_lock); if (!head->next || list_empty(head)) goto unlock; head = head->next; unlock: INIT_LIST_HEAD(orig_head); __bpf_spin_unlock_irqrestore(spin_lock); while (head != orig_head) { void *obj = head; obj -= field->graph_root.node_offset; head = head->next; /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); } } /* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are * 'rb_node *', so field name of rb_node within containing struct is not * needed. * * Since bpf_rb_tree's node type has a corresponding struct btf_field with * graph_root.node_offset, it's not necessary to know field name * or type of node struct */ #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \ for (pos = rb_first_postorder(root); \ pos && ({ n = rb_next_postorder(pos); 1; }); \ pos = n) void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock) { struct rb_root_cached orig_root, *root = rb_root; struct rb_node *pos, *n; void *obj; BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root)); BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root)); __bpf_spin_lock_irqsave(spin_lock); orig_root = *root; *root = RB_ROOT_CACHED; __bpf_spin_unlock_irqrestore(spin_lock); bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) { obj = pos; obj -= field->graph_root.node_offset; __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); } } __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; u64 size = local_type_id__k; void *p; p = bpf_mem_alloc(&bpf_global_ma, size); if (!p) return NULL; if (meta) bpf_obj_init(meta->record, p); return p; } __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) { u64 size = local_type_id__k; /* The verifier has ensured that meta__ign must be NULL */ return bpf_mem_alloc(&bpf_global_percpu_ma, size); } /* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) { struct bpf_mem_alloc *ma; if (rec && rec->refcount_off >= 0 && !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { /* Object is refcounted and refcount_dec didn't result in 0 * refcount. Return without freeing the object */ return; } if (rec) bpf_obj_free_fields(rec, p); if (percpu) ma = &bpf_global_percpu_ma; else ma = &bpf_global_ma; bpf_mem_free_rcu(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false); } __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) { /* The verifier has ensured that meta__ign must be NULL */ bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc); } __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; struct bpf_refcount *ref; /* Could just cast directly to refcount_t *, but need some code using * bpf_refcount type so that it is emitted in vmlinux BTF */ ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); if (!refcount_inc_not_zero((refcount_t *)ref)) return NULL; /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null * in verifier.c */ return (void *)p__refcounted_kptr; } static int __bpf_list_add(struct bpf_list_node_kern *node, struct bpf_list_head *head, bool tail, struct btf_record *rec, u64 off) { struct list_head *n = &node->list_head, *h = (void *)head; /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); /* node->owner != NULL implies !list_empty(n), no need to separately * check the latter */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } tail ? list_add_tail(n, h) : list_add(n, h); WRITE_ONCE(node->owner, head); return 0; } __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, struct bpf_list_node *node, void *meta__ign, u64 off) { struct bpf_list_node_kern *n = (void *)node; struct btf_struct_meta *meta = meta__ign; return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off); } __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, struct bpf_list_node *node, void *meta__ign, u64 off) { struct bpf_list_node_kern *n = (void *)node; struct btf_struct_meta *meta = meta__ign; return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off); } static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) { struct list_head *n, *h = (void *)head; struct bpf_list_node_kern *node; /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); if (list_empty(h)) return NULL; n = tail ? h->prev : h->next; node = container_of(n, struct bpf_list_node_kern, list_head); if (WARN_ON_ONCE(READ_ONCE(node->owner) != head)) return NULL; list_del_init(n); WRITE_ONCE(node->owner, NULL); return (struct bpf_list_node *)n; } __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) { return __bpf_list_del(head, false); } __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) { return __bpf_list_del(head, true); } __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head) { struct list_head *h = (struct list_head *)head; if (list_empty(h) || unlikely(!h->next)) return NULL; return (struct bpf_list_node *)h->next; } __bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head) { struct list_head *h = (struct list_head *)head; if (list_empty(h) || unlikely(!h->next)) return NULL; return (struct bpf_list_node *)h->prev; } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct bpf_rb_node *node) { struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = &node_internal->rb_node; /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or * n is owned by some other tree. No need to check RB_EMPTY_NODE(n) */ if (READ_ONCE(node_internal->owner) != root) return NULL; rb_erase_cached(n, r); RB_CLEAR_NODE(n); WRITE_ONCE(node_internal->owner, NULL); return (struct bpf_rb_node *)n; } /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program */ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node_kern *node, void *less, struct btf_record *rec, u64 off) { struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; struct rb_node *parent = NULL, *n = &node->rb_node; bpf_callback_t cb = (bpf_callback_t)less; bool leftmost = true; /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately * check the latter */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } while (*link) { parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = false; } } rb_link_node(n, parent, link); rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); WRITE_ONCE(node->owner, root); return 0; } __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), void *meta__ign, u64 off) { struct btf_struct_meta *meta = meta__ign; struct bpf_rb_node_kern *n = (void *)node; return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) { struct rb_root_cached *r = (struct rb_root_cached *)root; return (struct bpf_rb_node *)rb_first_cached(r); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root) { struct rb_root_cached *r = (struct rb_root_cached *)root; return (struct bpf_rb_node *)r->rb_root.rb_node; } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node) { struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; if (READ_ONCE(node_internal->owner) != root) return NULL; return (struct bpf_rb_node *)node_internal->rb_node.rb_left; } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node) { struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; if (READ_ONCE(node_internal->owner) != root) return NULL; return (struct bpf_rb_node *)node_internal->rb_node.rb_right; } /** * bpf_task_acquire - Acquire a reference to a task. A task acquired by this * kfunc which is not stored in a map as a kptr, must be released by calling * bpf_task_release(). * @p: The task on which a reference is being acquired. */ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { if (refcount_inc_not_zero(&p->rcu_users)) return p; return NULL; } /** * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released. */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { put_task_struct_rcu_user(p); } __bpf_kfunc void bpf_task_release_dtor(void *p) { put_task_struct_rcu_user(p); } CFI_NOSEAL(bpf_task_release_dtor); #ifdef CONFIG_CGROUPS /** * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by * this kfunc which is not stored in a map as a kptr, must be released by * calling bpf_cgroup_release(). * @cgrp: The cgroup on which a reference is being acquired. */ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) { return cgroup_tryget(cgrp) ? cgrp : NULL; } /** * bpf_cgroup_release - Release the reference acquired on a cgroup. * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to * not be freed until the current grace period has ended, even if its refcount * drops to 0. * @cgrp: The cgroup on which a reference is being released. */ __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp) { cgroup_put(cgrp); } __bpf_kfunc void bpf_cgroup_release_dtor(void *cgrp) { cgroup_put(cgrp); } CFI_NOSEAL(bpf_cgroup_release_dtor); /** * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor * array. A cgroup returned by this kfunc which is not subsequently stored in a * map, must be released by calling bpf_cgroup_release(). * @cgrp: The cgroup for which we're performing a lookup. * @level: The level of ancestor to look up. */ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) { struct cgroup *ancestor; if (level > cgrp->level || level < 0) return NULL; /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */ ancestor = cgrp->ancestors[level]; if (!cgroup_tryget(ancestor)) return NULL; return ancestor; } /** * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this * kfunc which is not subsequently stored in a map, must be released by calling * bpf_cgroup_release(). * @cgid: cgroup id. */ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { struct cgroup *cgrp; cgrp = __cgroup_get_from_id(cgid); if (IS_ERR(cgrp)) return NULL; return cgrp; } /** * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test * task's membership of cgroup ancestry. * @task: the task to be tested * @ancestor: possible ancestor of @task's cgroup * * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. * It follows all the same rules as cgroup_is_descendant, and only applies * to the default hierarchy. */ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) { long ret; rcu_read_lock(); ret = task_under_cgroup_hierarchy(task, ancestor); rcu_read_unlock(); return ret; } BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; if (unlikely(idx >= array->map.max_entries)) return -E2BIG; cgrp = READ_ONCE(array->ptrs[idx]); if (unlikely(!cgrp)) return -EAGAIN; return task_under_cgroup_hierarchy(current, cgrp); } const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { .func = bpf_current_task_under_cgroup, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; /** * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its * hierarchy ID. * @task: The target task * @hierarchy_id: The ID of a cgroup1 hierarchy * * On success, the cgroup is returen. On failure, NULL is returned. */ __bpf_kfunc struct cgroup * bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) { struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id); if (IS_ERR(cgrp)) return NULL; return cgrp; } #endif /* CONFIG_CGROUPS */ /** * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up * in the root pid namespace idr. If a task is returned, it must either be * stored in a map, or released with bpf_task_release(). * @pid: The pid of the task being looked up. */ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) { struct task_struct *p; rcu_read_lock(); p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) p = bpf_task_acquire(p); rcu_read_unlock(); return p; } /** * bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up * in the pid namespace of the current task. If a task is returned, it must * either be stored in a map, or released with bpf_task_release(). * @vpid: The vpid of the task being looked up. */ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid) { struct task_struct *p; rcu_read_lock(); p = find_task_by_vpid(vpid); if (p) p = bpf_task_acquire(p); rcu_read_unlock(); return p; } /** * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * If the intention is to write to the data slice, please use * bpf_dynptr_slice_rdwr. * * The user must check that the returned pointer is not null before using it. * * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; if (!ptr->data) return NULL; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) return NULL; type = bpf_dynptr_get_type(ptr); switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: return ptr->data + ptr->offset + offset; case BPF_DYNPTR_TYPE_SKB: if (buffer__opt) return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt); else return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len); case BPF_DYNPTR_TYPE_XDP: { void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); if (!IS_ERR_OR_NULL(xdp_ptr)) return xdp_ptr; if (!buffer__opt) return NULL; bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false); return buffer__opt; } case BPF_DYNPTR_TYPE_SKB_META: return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; } } /** * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * The returned pointer is writable and may point to either directly the dynptr * data at the requested offset or to the buffer if unable to obtain a direct * data pointer to (example: the requested slice is to the paged area of an skb * packet). In the case where the returned pointer is to the buffer, the user * is responsible for persisting writes through calling bpf_dynptr_write(). This * usually looks something like this pattern: * * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer)); * if (!eth) * return TC_ACT_SHOT; * * // mutate eth header // * * if (eth == buffer) * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); * * Please note that, as in the example above, the user must check that the * returned pointer is not null before using it. * * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * * For skb-type dynptrs, it is safe to write into the returned pointer * if the bpf program allows skb data writes. There are two possibilities * that may occur when calling bpf_dynptr_slice_rdwr: * * 1) The requested slice is in the head of the skb. In this case, the * returned pointer is directly to skb data, and if the skb is cloned, the * verifier will have uncloned it (see bpf_unclone_prologue()) already. * The pointer can be directly written into. * * 2) Some portion of the requested slice is in the paged buffer area. * In this case, the requested data will be copied out into the buffer * and the returned pointer will be a pointer to the buffer. The skb * will not be pulled. To persist the write, the user will need to call * bpf_dynptr_write(), which will pull the skb and commit the write. * * Similarly for xdp programs, if the requested slice is not across xdp * fragments, then a direct pointer will be returned, otherwise the data * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) return -EINVAL; size = __bpf_dynptr_size(ptr); if (start > size || end > size) return -ERANGE; ptr->offset += start; bpf_dynptr_set_size(ptr, end - start); return 0; } __bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; return !ptr->data; } __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, struct bpf_dynptr *clone__uninit) { struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; if (!ptr->data) { bpf_dynptr_set_null(clone); return -EINVAL; } *clone = *ptr; return 0; } /** * bpf_dynptr_copy() - Copy data from one dynptr to another. * @dst_ptr: Destination dynptr - where data should be copied to * @dst_off: Offset into the destination dynptr * @src_ptr: Source dynptr - where data should be copied from * @src_off: Offset into the source dynptr * @size: Length of the data to copy from source to destination * * Copies data from source dynptr to destination dynptr. * Returns 0 on success; negative error, otherwise. */ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, struct bpf_dynptr *src_ptr, u32 src_off, u32 size) { struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; void *src_slice, *dst_slice; char buf[256]; u32 off; src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); if (src_slice && dst_slice) { memmove(dst_slice, src_slice, size); return 0; } if (src_slice) return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0); if (dst_slice) return __bpf_dynptr_read(dst_slice, size, src, src_off, 0); if (bpf_dynptr_check_off_len(dst, dst_off, size) || bpf_dynptr_check_off_len(src, src_off, size)) return -E2BIG; off = 0; while (off < size) { u32 chunk_sz = min_t(u32, sizeof(buf), size - off); int err; err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); if (err) return err; err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0); if (err) return err; off += chunk_sz; } return 0; } /** * bpf_dynptr_memset() - Fill dynptr memory with a constant byte. * @p: Destination dynptr - where data will be filled * @offset: Offset into the dynptr to start filling from * @size: Number of bytes to fill * @val: Constant byte to fill the memory with * * Fills the @size bytes of the memory area pointed to by @p * at @offset with the constant byte @val. * Returns 0 on success; negative error, otherwise. */ __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 chunk_sz, write_off; char buf[256]; void* slice; int err; slice = bpf_dynptr_slice_rdwr(p, offset, NULL, size); if (likely(slice)) { memset(slice, val, size); return 0; } if (__bpf_dynptr_is_rdonly(ptr)) return -EINVAL; err = bpf_dynptr_check_off_len(ptr, offset, size); if (err) return err; /* Non-linear data under the dynptr, write from a local buffer */ chunk_sz = min_t(u32, sizeof(buf), size); memset(buf, val, chunk_sz); for (write_off = 0; write_off < size; write_off += chunk_sz) { chunk_sz = min_t(u32, sizeof(buf), size - write_off); err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0); if (err) return err; } return 0; } __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; } __bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k) { return (void *)obj__ign; } __bpf_kfunc void bpf_rcu_read_lock(void) { rcu_read_lock(); } __bpf_kfunc void bpf_rcu_read_unlock(void) { rcu_read_unlock(); } struct bpf_throw_ctx { struct bpf_prog_aux *aux; u64 sp; u64 bp; int cnt; }; static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp) { struct bpf_throw_ctx *ctx = cookie; struct bpf_prog *prog; /* * The RCU read lock is held to safely traverse the latch tree, but we * don't need its protection when accessing the prog, since it has an * active stack frame on the current stack trace, and won't disappear. */ rcu_read_lock(); prog = bpf_prog_ksym_find(ip); rcu_read_unlock(); if (!prog) return !ctx->cnt; ctx->cnt++; if (bpf_is_subprog(prog)) return true; ctx->aux = prog->aux; ctx->sp = sp; ctx->bp = bp; return false; } __bpf_kfunc void bpf_throw(u64 cookie) { struct bpf_throw_ctx ctx = {}; arch_bpf_stack_walk(bpf_stack_walker, &ctx); WARN_ON_ONCE(!ctx.aux); if (ctx.aux) WARN_ON_ONCE(!ctx.aux->exception_boundary); WARN_ON_ONCE(!ctx.bp); WARN_ON_ONCE(!ctx.cnt); /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning * deeper stack depths than ctx.sp as we do not return from bpf_throw, * which skips compiler generated instrumentation to do the same. */ kasan_unpoison_task_stack_below((void *)(long)ctx.sp); ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp, 0, 0); WARN(1, "A call to BPF exception callback should never return\n"); } __bpf_kfunc int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) { struct bpf_async_kern *async = (struct bpf_async_kern *)wq; struct bpf_map *map = p__map; BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_wq)); BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_wq)); if (flags) return -EINVAL; return __bpf_async_init(async, map, flags, BPF_ASYNC_TYPE_WQ); } __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) { struct bpf_async_kern *async = (struct bpf_async_kern *)wq; struct bpf_work *w; if (in_nmi()) return -EOPNOTSUPP; if (flags) return -EINVAL; w = READ_ONCE(async->work); if (!w || !READ_ONCE(w->cb.prog)) return -EINVAL; schedule_work(&w->work); return 0; } __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, int (callback_fn)(void *map, int *key, void *value), unsigned int flags, void *aux__prog) { struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog; struct bpf_async_kern *async = (struct bpf_async_kern *)wq; if (flags) return -EINVAL; return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ); } __bpf_kfunc void bpf_preempt_disable(void) { preempt_disable(); } __bpf_kfunc void bpf_preempt_enable(void) { preempt_enable(); } struct bpf_iter_bits { __u64 __opaque[2]; } __aligned(8); #define BITS_ITER_NR_WORDS_MAX 511 struct bpf_iter_bits_kern { union { __u64 *bits; __u64 bits_copy; }; int nr_bits; int bit; } __aligned(8); /* On 64-bit hosts, unsigned long and u64 have the same size, so passing * a u64 pointer and an unsigned long pointer to find_next_bit() will * return the same result, as both point to the same 8-byte area. * * For 32-bit little-endian hosts, using a u64 pointer or unsigned long * pointer also makes no difference. This is because the first iterated * unsigned long is composed of bits 0-31 of the u64 and the second unsigned * long is composed of bits 32-63 of the u64. * * However, for 32-bit big-endian hosts, this is not the case. The first * iterated unsigned long will be bits 32-63 of the u64, so swap these two * ulong values within the u64. */ static void swap_ulong_in_u64(u64 *bits, unsigned int nr) { #if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) unsigned int i; for (i = 0; i < nr; i++) bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32); #endif } /** * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area * @it: The new bpf_iter_bits to be created * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over * @nr_words: The size of the specified memory area, measured in 8-byte units. * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be * further reduced by the BPF memory allocator implementation. * * This function initializes a new bpf_iter_bits structure for iterating over * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It * copies the data of the memory area to the newly created bpf_iter_bits @it for * subsequent iteration operations. * * On success, 0 is returned. On failure, ERR is returned. */ __bpf_kfunc int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) { struct bpf_iter_bits_kern *kit = (void *)it; u32 nr_bytes = nr_words * sizeof(u64); u32 nr_bits = BYTES_TO_BITS(nr_bytes); int err; BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits)); BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) != __alignof__(struct bpf_iter_bits)); kit->nr_bits = 0; kit->bits_copy = 0; kit->bit = -1; if (!unsafe_ptr__ign || !nr_words) return -EINVAL; if (nr_words > BITS_ITER_NR_WORDS_MAX) return -E2BIG; /* Optimization for u64 mask */ if (nr_bits == 64) { err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign); if (err) return -EFAULT; swap_ulong_in_u64(&kit->bits_copy, nr_words); kit->nr_bits = nr_bits; return 0; } if (bpf_mem_alloc_check_size(false, nr_bytes)) return -E2BIG; /* Fallback to memalloc */ kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes); if (!kit->bits) return -ENOMEM; err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign); if (err) { bpf_mem_free(&bpf_global_ma, kit->bits); return err; } swap_ulong_in_u64(kit->bits, nr_words); kit->nr_bits = nr_bits; return 0; } /** * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits * @it: The bpf_iter_bits to be checked * * This function returns a pointer to a number representing the value of the * next bit in the bits. * * If there are no further bits available, it returns NULL. */ __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it) { struct bpf_iter_bits_kern *kit = (void *)it; int bit = kit->bit, nr_bits = kit->nr_bits; const void *bits; if (!nr_bits || bit >= nr_bits) return NULL; bits = nr_bits == 64 ? &kit->bits_copy : kit->bits; bit = find_next_bit(bits, nr_bits, bit + 1); if (bit >= nr_bits) { kit->bit = bit; return NULL; } kit->bit = bit; return &kit->bit; } /** * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits * @it: The bpf_iter_bits to be destroyed * * Destroy the resource associated with the bpf_iter_bits. */ __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it) { struct bpf_iter_bits_kern *kit = (void *)it; if (kit->nr_bits <= 64) return; bpf_mem_free(&bpf_global_ma, kit->bits); } /** * bpf_copy_from_user_str() - Copy a string from an unsafe user address * @dst: Destination address, in kernel space. This buffer must be * at least @dst__sz bytes long. * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. * @unsafe_ptr__ign: Source address, in user space. * @flags: The only supported flag is BPF_F_PAD_ZEROS * * Copies a NUL-terminated string from userspace to BPF space. If user string is * too long this will still ensure zero termination in the dst buffer unless * buffer size is 0. * * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and * memset all of @dst on failure. */ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags) { int ret; if (unlikely(flags & ~BPF_F_PAD_ZEROS)) return -EINVAL; if (unlikely(!dst__sz)) return 0; ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1); if (ret < 0) { if (flags & BPF_F_PAD_ZEROS) memset((char *)dst, 0, dst__sz); return ret; } if (flags & BPF_F_PAD_ZEROS) memset((char *)dst + ret, 0, dst__sz - ret); else ((char *)dst)[ret] = '\0'; return ret + 1; } /** * bpf_copy_from_user_task_str() - Copy a string from an task's address space * @dst: Destination address, in kernel space. This buffer must be * at least @dst__sz bytes long. * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. * @unsafe_ptr__ign: Source address in the task's address space. * @tsk: The task whose address space will be used * @flags: The only supported flag is BPF_F_PAD_ZEROS * * Copies a NUL terminated string from a task's address space to @dst__sz * buffer. If user string is too long this will still ensure zero termination * in the @dst__sz buffer unless buffer size is 0. * * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success * and memset all of @dst__sz on failure. * * Return: The number of copied bytes on success including the NUL terminator. * A negative error code on failure. */ __bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags) { int ret; if (unlikely(flags & ~BPF_F_PAD_ZEROS)) return -EINVAL; if (unlikely(dst__sz == 0)) return 0; ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0); if (ret < 0) { if (flags & BPF_F_PAD_ZEROS) memset(dst, 0, dst__sz); return ret; } if (flags & BPF_F_PAD_ZEROS) memset(dst + ret, 0, dst__sz - ret); return ret + 1; } /* Keep unsinged long in prototype so that kfunc is usable when emitted to * vmlinux.h in BPF programs directly, but note that while in BPF prog, the * unsigned long always points to 8-byte region on stack, the kernel may only * read and write the 4-bytes on 32-bit. */ __bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag) { local_irq_save(*flags__irq_flag); } __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) { local_irq_restore(*flags__irq_flag); } __bpf_kfunc void __bpf_trap(void) { } /* * Kfuncs for string operations. * * Since strings are not necessarily %NUL-terminated, we cannot directly call * in-kernel implementations. Instead, we open-code the implementations using * __get_kernel_nofault instead of plain dereference to make them safe. */ static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case) { char c1, c2; int i; if (!copy_from_kernel_nofault_allowed(s1, 1) || !copy_from_kernel_nofault_allowed(s2, 1)) { return -ERANGE; } guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&c1, s1, char, err_out); __get_kernel_nofault(&c2, s2, char, err_out); if (ignore_case) { c1 = tolower(c1); c2 = tolower(c2); } if (c1 != c2) return c1 < c2 ? -1 : 1; if (c1 == '\0') return 0; s1++; s2++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strcmp - Compare two strings * @s1__ign: One string * @s2__ign: Another string * * Return: * * %0 - Strings are equal * * %-1 - @s1__ign is smaller * * %1 - @s2__ign is smaller * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of strings is too large * * %-ERANGE - One of strings is outside of kernel address space */ __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) { return __bpf_strcasecmp(s1__ign, s2__ign, false); } /** * bpf_strcasecmp - Compare two strings, ignoring the case of the characters * @s1__ign: One string * @s2__ign: Another string * * Return: * * %0 - Strings are equal * * %-1 - @s1__ign is smaller * * %1 - @s2__ign is smaller * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of strings is too large * * %-ERANGE - One of strings is outside of kernel address space */ __bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign) { return __bpf_strcasecmp(s1__ign, s2__ign, true); } /** * bpf_strnchr - Find a character in a length limited string * @s__ign: The string to be searched * @count: The number of characters to be searched * @c: The character to search for * * Note that the %NUL-terminator is considered part of the string, and can * be searched for. * * Return: * * >=0 - Index of the first occurrence of @c within @s__ign * * %-ENOENT - @c not found in the first @count characters of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strnchr(const char *s__ign, size_t count, char c) { char sc; int i; if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE; guard(pagefault)(); for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == c) return i; if (sc == '\0') return -ENOENT; s__ign++; } return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT; err_out: return -EFAULT; } /** * bpf_strchr - Find the first occurrence of a character in a string * @s__ign: The string to be searched * @c: The character to search for * * Note that the %NUL-terminator is considered part of the string, and can * be searched for. * * Return: * * >=0 - The index of the first occurrence of @c within @s__ign * * %-ENOENT - @c not found in @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strchr(const char *s__ign, char c) { return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c); } /** * bpf_strchrnul - Find and return a character in a string, or end of string * @s__ign: The string to be searched * @c: The character to search for * * Return: * * >=0 - Index of the first occurrence of @c within @s__ign or index of * the null byte at the end of @s__ign when @c is not found * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strchrnul(const char *s__ign, char c) { char sc; int i; if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE; guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == '\0' || sc == c) return i; s__ign++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strrchr - Find the last occurrence of a character in a string * @s__ign: The string to be searched * @c: The character to search for * * Return: * * >=0 - Index of the last occurrence of @c within @s__ign * * %-ENOENT - @c not found in @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strrchr(const char *s__ign, int c) { char sc; int i, last = -ENOENT; if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE; guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == c) last = i; if (sc == '\0') return last; s__ign++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strnlen - Calculate the length of a length-limited string * @s__ign: The string * @count: The maximum number of characters to count * * Return: * * >=0 - The length of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strnlen(const char *s__ign, size_t count) { char c; int i; if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE; guard(pagefault)(); for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&c, s__ign, char, err_out); if (c == '\0') return i; s__ign++; } return i == XATTR_SIZE_MAX ? -E2BIG : i; err_out: return -EFAULT; } /** * bpf_strlen - Calculate the length of a string * @s__ign: The string * * Return: * * >=0 - The length of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space */ __bpf_kfunc int bpf_strlen(const char *s__ign) { return bpf_strnlen(s__ign, XATTR_SIZE_MAX); } /** * bpf_strspn - Calculate the length of the initial substring of @s__ign which * only contains letters in @accept__ign * @s__ign: The string to be searched * @accept__ign: The string to search for * * Return: * * >=0 - The length of the initial substring of @s__ign which only * contains letters from @accept__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space */ __bpf_kfunc int bpf_strspn(const char *s__ign, const char *accept__ign) { char cs, ca; int i, j; if (!copy_from_kernel_nofault_allowed(s__ign, 1) || !copy_from_kernel_nofault_allowed(accept__ign, 1)) { return -ERANGE; } guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&cs, s__ign, char, err_out); if (cs == '\0') return i; for (j = 0; j < XATTR_SIZE_MAX; j++) { __get_kernel_nofault(&ca, accept__ign + j, char, err_out); if (cs == ca || ca == '\0') break; } if (j == XATTR_SIZE_MAX) return -E2BIG; if (ca == '\0') return i; s__ign++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strcspn - Calculate the length of the initial substring of @s__ign which * does not contain letters in @reject__ign * @s__ign: The string to be searched * @reject__ign: The string to search for * * Return: * * >=0 - The length of the initial substring of @s__ign which does not * contain letters from @reject__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space */ __bpf_kfunc int bpf_strcspn(const char *s__ign, const char *reject__ign) { char cs, cr; int i, j; if (!copy_from_kernel_nofault_allowed(s__ign, 1) || !copy_from_kernel_nofault_allowed(reject__ign, 1)) { return -ERANGE; } guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&cs, s__ign, char, err_out); if (cs == '\0') return i; for (j = 0; j < XATTR_SIZE_MAX; j++) { __get_kernel_nofault(&cr, reject__ign + j, char, err_out); if (cs == cr || cr == '\0') break; } if (j == XATTR_SIZE_MAX) return -E2BIG; if (cr != '\0') return i; s__ign++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strnstr - Find the first substring in a length-limited string * @s1__ign: The string to be searched * @s2__ign: The string to search for * @len: the maximum number of characters to search * * Return: * * >=0 - Index of the first character of the first occurrence of @s2__ign * within the first @len characters of @s1__ign * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space */ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len) { char c1, c2; int i, j; if (!copy_from_kernel_nofault_allowed(s1__ign, 1) || !copy_from_kernel_nofault_allowed(s2__ign, 1)) { return -ERANGE; } guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) { __get_kernel_nofault(&c2, s2__ign + j, char, err_out); if (c2 == '\0') return i; /* * We allow reading an extra byte from s2 (note the * `i + j <= len` above) to cover the case when s2 is * a suffix of the first len chars of s1. */ if (i + j == len) break; __get_kernel_nofault(&c1, s1__ign + j, char, err_out); if (c1 == '\0') return -ENOENT; if (c1 != c2) break; } if (j == XATTR_SIZE_MAX) return -E2BIG; if (i + j == len) return -ENOENT; s1__ign++; } return -E2BIG; err_out: return -EFAULT; } /** * bpf_strstr - Find the first substring in a string * @s1__ign: The string to be searched * @s2__ign: The string to search for * * Return: * * >=0 - Index of the first character of the first occurrence of @s2__ign * within @s1__ign * * %-ENOENT - @s2__ign is not a substring of @s1__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space */ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign) { return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX); } #ifdef CONFIG_KEYS /** * bpf_lookup_user_key - lookup a key by its serial * @serial: key handle serial number * @flags: lookup-specific flags * * Search a key with a given *serial* and the provided *flags*. * If found, increment the reference count of the key by one, and * return it in the bpf_key structure. * * The bpf_key structure must be passed to bpf_key_put() when done * with it, so that the key reference count is decremented and the * bpf_key structure is freed. * * Permission checks are deferred to the time the key is used by * one of the available key-specific kfuncs. * * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested * special keyring (e.g. session keyring), if it doesn't yet exist. * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting * for the key construction, and to retrieve uninstantiated keys (keys * without data attached to them). * * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ __bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; if (flags & ~KEY_LOOKUP_ALL) return NULL; /* * Permission check is deferred until the key is used, as the * intent of the caller is unknown here. */ key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK); if (IS_ERR(key_ref)) return NULL; bkey = kmalloc(sizeof(*bkey), GFP_KERNEL); if (!bkey) { key_put(key_ref_to_ptr(key_ref)); return NULL; } bkey->key = key_ref_to_ptr(key_ref); bkey->has_ref = true; return bkey; } /** * bpf_lookup_system_key - lookup a key by a system-defined ID * @id: key ID * * Obtain a bpf_key structure with a key pointer set to the passed key ID. * The key pointer is marked as invalid, to prevent bpf_key_put() from * attempting to decrement the key reference count on that pointer. The key * pointer set in such way is currently understood only by * verify_pkcs7_signature(). * * Set *id* to one of the values defined in include/linux/verification.h: * 0 for the primary keyring (immutable keyring of system keys); * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring * (where keys can be added only if they are vouched for by existing keys * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform * keyring (primarily used by the integrity subsystem to verify a kexec'ed * kerned image and, possibly, the initramfs signature). * * Return: a bpf_key pointer with an invalid key pointer set from the * pre-determined ID on success, a NULL pointer otherwise */ __bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id) { struct bpf_key *bkey; if (system_keyring_id_check(id) < 0) return NULL; bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC); if (!bkey) return NULL; bkey->key = (struct key *)(unsigned long)id; bkey->has_ref = false; return bkey; } /** * bpf_key_put - decrement key reference count if key is valid and free bpf_key * @bkey: bpf_key structure * * Decrement the reference count of the key inside *bkey*, if the pointer * is valid, and free *bkey*. */ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) { if (bkey->has_ref) key_put(bkey->key); kfree(bkey); } /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature * @data_p: data to verify * @sig_p: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* * with keys in a keyring referenced by *trusted_keyring*. * * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { #ifdef CONFIG_SYSTEM_DATA_VERIFICATION struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; if (trusted_keyring->has_ref) { /* * Do the permission check deferred in bpf_lookup_user_key(). * See bpf_lookup_user_key() for more details. * * A call to key_task_permission() here would be redundant, as * it is already done by keyring_search() called by * find_asymmetric_key(). */ ret = key_validate(trusted_keyring->key); if (ret < 0) return ret; } data_len = __bpf_dynptr_size(data_ptr); data = __bpf_dynptr_data(data_ptr, data_len); sig_len = __bpf_dynptr_size(sig_ptr); sig = __bpf_dynptr_data(sig_ptr, sig_len); return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, VERIFYING_BPF_SIGNATURE, NULL, NULL); #else return -EOPNOTSUPP; #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ } #endif /* CONFIG_KEYS */ typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value); enum bpf_task_work_state { /* bpf_task_work is ready to be used */ BPF_TW_STANDBY = 0, /* irq work scheduling in progress */ BPF_TW_PENDING, /* task work scheduling in progress */ BPF_TW_SCHEDULING, /* task work is scheduled successfully */ BPF_TW_SCHEDULED, /* callback is running */ BPF_TW_RUNNING, /* associated BPF map value is deleted */ BPF_TW_FREED, }; struct bpf_task_work_ctx { enum bpf_task_work_state state; refcount_t refcnt; struct callback_head work; struct irq_work irq_work; /* bpf_prog that schedules task work */ struct bpf_prog *prog; /* task for which callback is scheduled */ struct task_struct *task; /* the map and map value associated with this context */ struct bpf_map *map; void *map_val; enum task_work_notify_mode mode; bpf_task_work_callback_t callback_fn; struct rcu_head rcu; } __aligned(8); /* Actual type for struct bpf_task_work */ struct bpf_task_work_kern { struct bpf_task_work_ctx *ctx; }; static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx) { if (ctx->prog) { bpf_prog_put(ctx->prog); ctx->prog = NULL; } if (ctx->task) { bpf_task_release(ctx->task); ctx->task = NULL; } } static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx) { return refcount_inc_not_zero(&ctx->refcnt); } static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx) { if (!refcount_dec_and_test(&ctx->refcnt)) return; bpf_task_work_ctx_reset(ctx); /* bpf_mem_free expects migration to be disabled */ migrate_disable(); bpf_mem_free(&bpf_global_ma, ctx); migrate_enable(); } static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx) { /* * Scheduled task_work callback holds ctx ref, so if we successfully * cancelled, we put that ref on callback's behalf. If we couldn't * cancel, callback will inevitably run or has already completed * running, and it would have taken care of its ctx ref itself. */ if (task_work_cancel(ctx->task, &ctx->work)) bpf_task_work_ctx_put(ctx); } static void bpf_task_work_callback(struct callback_head *cb) { struct bpf_task_work_ctx *ctx = container_of(cb, struct bpf_task_work_ctx, work); enum bpf_task_work_state state; u32 idx; void *key; /* Read lock is needed to protect ctx and map key/value access */ guard(rcu_tasks_trace)(); /* * This callback may start running before bpf_task_work_irq() switched to * SCHEDULED state, so handle both transition variants SCHEDULING|SCHEDULED -> RUNNING. */ state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_RUNNING); if (state == BPF_TW_SCHEDULED) state = cmpxchg(&ctx->state, BPF_TW_SCHEDULED, BPF_TW_RUNNING); if (state == BPF_TW_FREED) { bpf_task_work_ctx_put(ctx); return; } key = (void *)map_key_from_value(ctx->map, ctx->map_val, &idx); migrate_disable(); ctx->callback_fn(ctx->map, key, ctx->map_val); migrate_enable(); bpf_task_work_ctx_reset(ctx); (void)cmpxchg(&ctx->state, BPF_TW_RUNNING, BPF_TW_STANDBY); bpf_task_work_ctx_put(ctx); } static void bpf_task_work_irq(struct irq_work *irq_work) { struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work); enum bpf_task_work_state state; int err; guard(rcu_tasks_trace)(); if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) { bpf_task_work_ctx_put(ctx); return; } err = task_work_add(ctx->task, &ctx->work, ctx->mode); if (err) { bpf_task_work_ctx_reset(ctx); /* * try to switch back to STANDBY for another task_work reuse, but we might have * gone to FREED already, which is fine as we already cleaned up after ourselves */ (void)cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_STANDBY); bpf_task_work_ctx_put(ctx); return; } /* * It's technically possible for just scheduled task_work callback to * complete running by now, going SCHEDULING -> RUNNING and then * dropping its ctx refcount. Instead of capturing extra ref just to * protected below ctx->state access, we rely on RCU protection to * perform below SCHEDULING -> SCHEDULED attempt. */ state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED); if (state == BPF_TW_FREED) bpf_task_work_cancel(ctx); /* clean up if we switched into FREED state */ } static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *tw, struct bpf_map *map) { struct bpf_task_work_kern *twk = (void *)tw; struct bpf_task_work_ctx *ctx, *old_ctx; ctx = READ_ONCE(twk->ctx); if (ctx) return ctx; ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx)); if (!ctx) return ERR_PTR(-ENOMEM); memset(ctx, 0, sizeof(*ctx)); refcount_set(&ctx->refcnt, 1); /* map's own ref */ ctx->state = BPF_TW_STANDBY; old_ctx = cmpxchg(&twk->ctx, NULL, ctx); if (old_ctx) { /* * tw->ctx is set by concurrent BPF program, release allocated * memory and try to reuse already set context. */ bpf_mem_free(&bpf_global_ma, ctx); return old_ctx; } return ctx; /* Success */ } static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work *tw, struct bpf_map *map) { struct bpf_task_work_ctx *ctx; ctx = bpf_task_work_fetch_ctx(tw, map); if (IS_ERR(ctx)) return ctx; /* try to get ref for task_work callback to hold */ if (!bpf_task_work_ctx_tryget(ctx)) return ERR_PTR(-EBUSY); if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) { /* lost acquiring race or map_release_uref() stole it from us, put ref and bail */ bpf_task_work_ctx_put(ctx); return ERR_PTR(-EBUSY); } /* * If no process or bpffs is holding a reference to the map, no new callbacks should be * scheduled. This does not address any race or correctness issue, but rather is a policy * choice: dropping user references should stop everything. */ if (!atomic64_read(&map->usercnt)) { /* drop ref we just got for task_work callback itself */ bpf_task_work_ctx_put(ctx); /* transfer map's ref into cancel_and_free() */ bpf_task_work_cancel_and_free(tw); return ERR_PTR(-EBUSY); } return ctx; } static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work *tw, struct bpf_map *map, bpf_task_work_callback_t callback_fn, struct bpf_prog_aux *aux, enum task_work_notify_mode mode) { struct bpf_prog *prog; struct bpf_task_work_ctx *ctx; int err; BTF_TYPE_EMIT(struct bpf_task_work); prog = bpf_prog_inc_not_zero(aux->prog); if (IS_ERR(prog)) return -EBADF; task = bpf_task_acquire(task); if (!task) { err = -EBADF; goto release_prog; } ctx = bpf_task_work_acquire_ctx(tw, map); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto release_all; } ctx->task = task; ctx->callback_fn = callback_fn; ctx->prog = prog; ctx->mode = mode; ctx->map = map; ctx->map_val = (void *)tw - map->record->task_work_off; init_task_work(&ctx->work, bpf_task_work_callback); init_irq_work(&ctx->irq_work, bpf_task_work_irq); irq_work_queue(&ctx->irq_work); return 0; release_all: bpf_task_release(task); release_prog: bpf_prog_put(prog); return err; } /** * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values * @callback: pointer to BPF subprogram to call * @aux__prog: user should pass NULL * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw, void *map__map, bpf_task_work_callback_t callback, void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL); } /** * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values * @callback: pointer to BPF subprogram to call * @aux__prog: user should pass NULL * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw, void *map__map, bpf_task_work_callback_t callback, void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } __bpf_kfunc_end_defs(); static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work) { struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work); bpf_task_work_cancel(ctx); /* this might put task_work callback's ref */ bpf_task_work_ctx_put(ctx); /* and here we put map's own ref that was transferred to us */ } void bpf_task_work_cancel_and_free(void *val) { struct bpf_task_work_kern *twk = val; struct bpf_task_work_ctx *ctx; enum bpf_task_work_state state; ctx = xchg(&twk->ctx, NULL); if (!ctx) return; state = xchg(&ctx->state, BPF_TW_FREED); if (state == BPF_TW_SCHEDULED) { /* run in irq_work to avoid locks in NMI */ init_irq_work(&ctx->irq_work, bpf_task_work_cancel_scheduled); irq_work_queue(&ctx->irq_work); return; } bpf_task_work_ctx_put(ctx); /* put bpf map's ref */ } BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) #ifdef CONFIG_BPF_EVENTS BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS) #endif #ifdef CONFIG_KEYS BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif #endif BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, .set = &generic_btf_ids, }; BTF_ID_LIST(generic_dtor_ids) BTF_ID(struct, task_struct) BTF_ID(func, bpf_task_release_dtor) #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif BTF_KFUNCS_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) #endif BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) BTF_ID_FLAGS(func, bpf_dynptr_copy) BTF_ID_FLAGS(func, bpf_dynptr_memset) #ifdef CONFIG_NET BTF_ID_FLAGS(func, bpf_modify_return_test_tp) #endif BTF_ID_FLAGS(func, bpf_wq_init) BTF_ID_FLAGS(func, bpf_wq_set_callback_impl) BTF_ID_FLAGS(func, bpf_wq_start) BTF_ID_FLAGS(func, bpf_preempt_disable) BTF_ID_FLAGS(func, bpf_preempt_enable) BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_get_kmem_cache) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr) BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) #ifdef CONFIG_DMA_SHARED_BUFFER BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) #endif BTF_ID_FLAGS(func, __bpf_trap) BTF_ID_FLAGS(func, bpf_strcmp); BTF_ID_FLAGS(func, bpf_strcasecmp); BTF_ID_FLAGS(func, bpf_strchr); BTF_ID_FLAGS(func, bpf_strchrnul); BTF_ID_FLAGS(func, bpf_strnchr); BTF_ID_FLAGS(func, bpf_strrchr); BTF_ID_FLAGS(func, bpf_strlen); BTF_ID_FLAGS(func, bpf_strnlen); BTF_ID_FLAGS(func, bpf_strspn); BTF_ID_FLAGS(func, bpf_strcspn); BTF_ID_FLAGS(func, bpf_strstr); BTF_ID_FLAGS(func, bpf_strnstr); #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS) BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, .set = &common_btf_ids, }; static int __init kfunc_init(void) { int ret; const struct btf_id_dtor_kfunc generic_dtors[] = { { .btf_id = generic_dtor_ids[0], .kfunc_btf_id = generic_dtor_ids[1] }, #ifdef CONFIG_CGROUPS { .btf_id = generic_dtor_ids[2], .kfunc_btf_id = generic_dtor_ids[3] }, #endif }; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); } late_initcall(kfunc_init); /* Get a pointer to dynptr data up to len bytes for read only access. If * the dynptr doesn't have continuous data up to len bytes, return NULL. */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If * the dynptr doesn't have continuous data up to len bytes, or the dynptr * is read only, return NULL. */ void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) { if (__bpf_dynptr_is_rdonly(ptr)) return NULL; return (void *)__bpf_dynptr_data(ptr, len); } |
| 17 17 2 16 10 8 1 11 8 7 2 7 6 2 6 5 1 4 4 4 3 3 4 4 13 13 13 12 12 13 13 13 13 13 13 13 13 3 3 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 21 21 14 12 10 9 8 15 18 8 7 10 7 6 2 1 4 8 4 3 3 7 4 2 2 2 2 1 4 21 4 12 8 7 7 3 4 4 8 46 47 1 1 2 3 2 1 8 3 3 2 1 4 3 2 1 2 1 1 1 1 21 20 21 1 1 3 1 1 1 1 1 47 27 19 5 5 5 5 17 17 17 19 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2001 Paul Stewart * Copyright (c) 2001 Vojtech Pavlik * * HID char devices, giving access to raw HID device events. */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to Paul Stewart <stewart@wetlogic.net> */ #include <linux/poll.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/module.h> #include <linux/init.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/hid.h> #include <linux/hiddev.h> #include <linux/compat.h> #include <linux/vmalloc.h> #include <linux/nospec.h> #include "usbhid.h" #ifdef CONFIG_USB_DYNAMIC_MINORS #define HIDDEV_MINOR_BASE 0 #define HIDDEV_MINORS 256 #else #define HIDDEV_MINOR_BASE 96 #define HIDDEV_MINORS 16 #endif #define HIDDEV_BUFFER_SIZE 2048 struct hiddev_list { struct hiddev_usage_ref buffer[HIDDEV_BUFFER_SIZE]; int head; int tail; unsigned flags; struct fasync_struct *fasync; struct hiddev *hiddev; struct list_head node; struct mutex thread_lock; }; /* * Find a report, given the report's type and ID. The ID can be specified * indirectly by REPORT_ID_FIRST (which returns the first report of the given * type) or by (REPORT_ID_NEXT | old_id), which returns the next report of the * given type which follows old_id. */ static struct hid_report * hiddev_lookup_report(struct hid_device *hid, struct hiddev_report_info *rinfo) { unsigned int flags = rinfo->report_id & ~HID_REPORT_ID_MASK; unsigned int rid = rinfo->report_id & HID_REPORT_ID_MASK; struct hid_report_enum *report_enum; struct hid_report *report; struct list_head *list; if (rinfo->report_type < HID_REPORT_TYPE_MIN || rinfo->report_type > HID_REPORT_TYPE_MAX) return NULL; report_enum = hid->report_enum + (rinfo->report_type - HID_REPORT_TYPE_MIN); switch (flags) { case 0: /* Nothing to do -- report_id is already set correctly */ break; case HID_REPORT_ID_FIRST: if (list_empty(&report_enum->report_list)) return NULL; list = report_enum->report_list.next; report = list_entry(list, struct hid_report, list); rinfo->report_id = report->id; break; case HID_REPORT_ID_NEXT: report = report_enum->report_id_hash[rid]; if (!report) return NULL; list = report->list.next; if (list == &report_enum->report_list) return NULL; report = list_entry(list, struct hid_report, list); rinfo->report_id = report->id; break; default: return NULL; } return report_enum->report_id_hash[rinfo->report_id]; } /* * Perform an exhaustive search of the report table for a usage, given its * type and usage id. */ static struct hid_field * hiddev_lookup_usage(struct hid_device *hid, struct hiddev_usage_ref *uref) { int i, j; struct hid_report *report; struct hid_report_enum *report_enum; struct hid_field *field; if (uref->report_type < HID_REPORT_TYPE_MIN || uref->report_type > HID_REPORT_TYPE_MAX) return NULL; report_enum = hid->report_enum + (uref->report_type - HID_REPORT_TYPE_MIN); list_for_each_entry(report, &report_enum->report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) { if (field->usage[j].hid == uref->usage_code) { uref->report_id = report->id; uref->field_index = i; uref->usage_index = j; return field; } } } } return NULL; } static void hiddev_send_event(struct hid_device *hid, struct hiddev_usage_ref *uref) { struct hiddev *hiddev = hid->hiddev; struct hiddev_list *list; unsigned long flags; spin_lock_irqsave(&hiddev->list_lock, flags); list_for_each_entry(list, &hiddev->list, node) { if (uref->field_index != HID_FIELD_INDEX_NONE || (list->flags & HIDDEV_FLAG_REPORT) != 0) { list->buffer[list->head] = *uref; list->head = (list->head + 1) & (HIDDEV_BUFFER_SIZE - 1); kill_fasync(&list->fasync, SIGIO, POLL_IN); } } spin_unlock_irqrestore(&hiddev->list_lock, flags); wake_up_interruptible(&hiddev->wait); } /* * This is where hid.c calls into hiddev to pass an event that occurred over * the interrupt pipe */ void hiddev_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { unsigned type = field->report_type; struct hiddev_usage_ref uref; uref.report_type = (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT : ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT : ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0)); uref.report_id = field->report->id; uref.field_index = field->index; uref.usage_index = (usage - field->usage); uref.usage_code = usage->hid; uref.value = value; hiddev_send_event(hid, &uref); } EXPORT_SYMBOL_GPL(hiddev_hid_event); void hiddev_report_event(struct hid_device *hid, struct hid_report *report) { unsigned type = report->type; struct hiddev_usage_ref uref; memset(&uref, 0, sizeof(uref)); uref.report_type = (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT : ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT : ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0)); uref.report_id = report->id; uref.field_index = HID_FIELD_INDEX_NONE; hiddev_send_event(hid, &uref); } /* * fasync file op */ static int hiddev_fasync(int fd, struct file *file, int on) { struct hiddev_list *list = file->private_data; return fasync_helper(fd, file, on, &list->fasync); } /* * release file op */ static int hiddev_release(struct inode * inode, struct file * file) { struct hiddev_list *list = file->private_data; unsigned long flags; spin_lock_irqsave(&list->hiddev->list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hiddev->list_lock, flags); mutex_lock(&list->hiddev->existancelock); if (!--list->hiddev->open) { if (list->hiddev->exist) { hid_hw_close(list->hiddev->hid); hid_hw_power(list->hiddev->hid, PM_HINT_NORMAL); } else { mutex_unlock(&list->hiddev->existancelock); kfree(list->hiddev); vfree(list); return 0; } } mutex_unlock(&list->hiddev->existancelock); vfree(list); return 0; } static int __hiddev_open(struct hiddev *hiddev, struct file *file) { struct hiddev_list *list; int error; lockdep_assert_held(&hiddev->existancelock); list = vzalloc(sizeof(*list)); if (!list) return -ENOMEM; mutex_init(&list->thread_lock); list->hiddev = hiddev; if (!hiddev->open++) { error = hid_hw_power(hiddev->hid, PM_HINT_FULLON); if (error < 0) goto err_drop_count; error = hid_hw_open(hiddev->hid); if (error < 0) goto err_normal_power; } spin_lock_irq(&hiddev->list_lock); list_add_tail(&list->node, &hiddev->list); spin_unlock_irq(&hiddev->list_lock); file->private_data = list; return 0; err_normal_power: hid_hw_power(hiddev->hid, PM_HINT_NORMAL); err_drop_count: hiddev->open--; vfree(list); return error; } /* * open file op */ static int hiddev_open(struct inode *inode, struct file *file) { struct usb_interface *intf; struct hid_device *hid; struct hiddev *hiddev; int res; intf = usbhid_find_interface(iminor(inode)); if (!intf) return -ENODEV; hid = usb_get_intfdata(intf); hiddev = hid->hiddev; mutex_lock(&hiddev->existancelock); res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV; mutex_unlock(&hiddev->existancelock); return res; } /* * "write" file op */ static ssize_t hiddev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos) { return -EINVAL; } /* * "read" file op */ static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos) { DEFINE_WAIT(wait); struct hiddev_list *list = file->private_data; int event_size; int retval; event_size = ((list->flags & HIDDEV_FLAG_UREF) != 0) ? sizeof(struct hiddev_usage_ref) : sizeof(struct hiddev_event); if (count < event_size) return 0; /* lock against other threads */ retval = mutex_lock_interruptible(&list->thread_lock); if (retval) return -ERESTARTSYS; while (retval == 0) { if (list->head == list->tail) { prepare_to_wait(&list->hiddev->wait, &wait, TASK_INTERRUPTIBLE); while (list->head == list->tail) { if (signal_pending(current)) { retval = -ERESTARTSYS; break; } if (!list->hiddev->exist) { retval = -EIO; break; } if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; break; } /* let O_NONBLOCK tasks run */ mutex_unlock(&list->thread_lock); schedule(); if (mutex_lock_interruptible(&list->thread_lock)) { finish_wait(&list->hiddev->wait, &wait); return -EINTR; } set_current_state(TASK_INTERRUPTIBLE); } finish_wait(&list->hiddev->wait, &wait); } if (retval) { mutex_unlock(&list->thread_lock); return retval; } while (list->head != list->tail && retval + event_size <= count) { if ((list->flags & HIDDEV_FLAG_UREF) == 0) { if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE) { struct hiddev_event event; event.hid = list->buffer[list->tail].usage_code; event.value = list->buffer[list->tail].value; if (copy_to_user(buffer + retval, &event, sizeof(struct hiddev_event))) { mutex_unlock(&list->thread_lock); return -EFAULT; } retval += sizeof(struct hiddev_event); } } else { if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE || (list->flags & HIDDEV_FLAG_REPORT) != 0) { if (copy_to_user(buffer + retval, list->buffer + list->tail, sizeof(struct hiddev_usage_ref))) { mutex_unlock(&list->thread_lock); return -EFAULT; } retval += sizeof(struct hiddev_usage_ref); } } list->tail = (list->tail + 1) & (HIDDEV_BUFFER_SIZE - 1); } } mutex_unlock(&list->thread_lock); return retval; } /* * "poll" file op * No kernel lock - fine */ static __poll_t hiddev_poll(struct file *file, poll_table *wait) { struct hiddev_list *list = file->private_data; poll_wait(file, &list->hiddev->wait, wait); if (list->head != list->tail) return EPOLLIN | EPOLLRDNORM | EPOLLOUT; if (!list->hiddev->exist) return EPOLLERR | EPOLLHUP; return 0; } /* * "ioctl" file op */ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg) { struct hid_device *hid = hiddev->hid; struct hiddev_report_info rinfo; struct hiddev_usage_ref_multi *uref_multi = NULL; struct hiddev_usage_ref *uref; struct hid_report *report; struct hid_field *field; int i; uref_multi = kmalloc(sizeof(struct hiddev_usage_ref_multi), GFP_KERNEL); if (!uref_multi) return -ENOMEM; uref = &uref_multi->uref; if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { if (copy_from_user(uref_multi, user_arg, sizeof(*uref_multi))) goto fault; } else { if (copy_from_user(uref, user_arg, sizeof(*uref))) goto fault; } switch (cmd) { case HIDIOCGUCODE: rinfo.report_type = uref->report_type; rinfo.report_id = uref->report_id; if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL) goto inval; if (uref->field_index >= report->maxfield) goto inval; uref->field_index = array_index_nospec(uref->field_index, report->maxfield); field = report->field[uref->field_index]; if (uref->usage_index >= field->maxusage) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->maxusage); uref->usage_code = field->usage[uref->usage_index].hid; if (copy_to_user(user_arg, uref, sizeof(*uref))) goto fault; goto goodreturn; default: if (cmd != HIDIOCGUSAGE && cmd != HIDIOCGUSAGES && uref->report_type == HID_REPORT_TYPE_INPUT) goto inval; if (uref->report_id == HID_REPORT_ID_UNKNOWN) { field = hiddev_lookup_usage(hid, uref); if (field == NULL) goto inval; } else { rinfo.report_type = uref->report_type; rinfo.report_id = uref->report_id; if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL) goto inval; if (uref->field_index >= report->maxfield) goto inval; uref->field_index = array_index_nospec(uref->field_index, report->maxfield); field = report->field[uref->field_index]; if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { if (uref_multi->num_values > HID_MAX_MULTI_USAGES || uref->usage_index + uref_multi->num_values > field->report_count) goto inval; uref->usage_index = array_index_nospec(uref->usage_index, field->report_count - uref_multi->num_values); } switch (cmd) { case HIDIOCGUSAGE: if (uref->usage_index >= field->report_count) goto inval; uref->value = field->value[uref->usage_index]; if (copy_to_user(user_arg, uref, sizeof(*uref))) goto fault; goto goodreturn; case HIDIOCSUSAGE: if (uref->usage_index >= field->report_count) goto inval; field->value[uref->usage_index] = uref->value; goto goodreturn; case HIDIOCGCOLLECTIONINDEX: i = field->usage[uref->usage_index].collection_index; kfree(uref_multi); return i; case HIDIOCGUSAGES: for (i = 0; i < uref_multi->num_values; i++) uref_multi->values[i] = field->value[uref->usage_index + i]; if (copy_to_user(user_arg, uref_multi, sizeof(*uref_multi))) goto fault; goto goodreturn; case HIDIOCSUSAGES: for (i = 0; i < uref_multi->num_values; i++) field->value[uref->usage_index + i] = uref_multi->values[i]; goto goodreturn; } goodreturn: kfree(uref_multi); return 0; fault: kfree(uref_multi); return -EFAULT; inval: kfree(uref_multi); return -EINVAL; } } static noinline int hiddev_ioctl_string(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg) { struct hid_device *hid = hiddev->hid; struct usb_device *dev = hid_to_usb_dev(hid); int idx, len; char *buf; if (get_user(idx, (int __user *)user_arg)) return -EFAULT; if ((buf = kmalloc(HID_STRING_SIZE, GFP_KERNEL)) == NULL) return -ENOMEM; if ((len = usb_string(dev, idx, buf, HID_STRING_SIZE-1)) < 0) { kfree(buf); return -EINVAL; } if (copy_to_user(user_arg+sizeof(int), buf, len+1)) { kfree(buf); return -EFAULT; } kfree(buf); return len; } static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct hiddev_list *list = file->private_data; struct hiddev *hiddev = list->hiddev; struct hid_device *hid; struct hiddev_collection_info cinfo; struct hiddev_report_info rinfo; struct hiddev_field_info finfo; struct hiddev_devinfo dinfo; struct hid_report *report; struct hid_field *field; void __user *user_arg = (void __user *)arg; int i, r = -EINVAL; /* Called without BKL by compat methods so no BKL taken */ mutex_lock(&hiddev->existancelock); if (!hiddev->exist) { r = -ENODEV; goto ret_unlock; } hid = hiddev->hid; switch (cmd) { case HIDIOCGVERSION: r = put_user(HID_VERSION, (int __user *)arg) ? -EFAULT : 0; break; case HIDIOCAPPLICATION: if (arg >= hid->maxapplication) break; for (i = 0; i < hid->maxcollection; i++) if (hid->collection[i].type == HID_COLLECTION_APPLICATION && arg-- == 0) break; if (i < hid->maxcollection) r = hid->collection[i].usage; break; case HIDIOCGDEVINFO: { struct usb_device *dev = hid_to_usb_dev(hid); struct usbhid_device *usbhid = hid->driver_data; memset(&dinfo, 0, sizeof(dinfo)); dinfo.bustype = BUS_USB; dinfo.busnum = dev->bus->busnum; dinfo.devnum = dev->devnum; dinfo.ifnum = usbhid->ifnum; dinfo.vendor = le16_to_cpu(dev->descriptor.idVendor); dinfo.product = le16_to_cpu(dev->descriptor.idProduct); dinfo.version = le16_to_cpu(dev->descriptor.bcdDevice); dinfo.num_applications = hid->maxapplication; r = copy_to_user(user_arg, &dinfo, sizeof(dinfo)) ? -EFAULT : 0; break; } case HIDIOCGFLAG: r = put_user(list->flags, (int __user *)arg) ? -EFAULT : 0; break; case HIDIOCSFLAG: { int newflags; if (get_user(newflags, (int __user *)arg)) { r = -EFAULT; break; } if ((newflags & ~HIDDEV_FLAGS) != 0 || ((newflags & HIDDEV_FLAG_REPORT) != 0 && (newflags & HIDDEV_FLAG_UREF) == 0)) break; list->flags = newflags; r = 0; break; } case HIDIOCGSTRING: r = hiddev_ioctl_string(hiddev, cmd, user_arg); break; case HIDIOCINITREPORT: usbhid_init_reports(hid); hiddev->initialized = true; r = 0; break; case HIDIOCGREPORT: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } if (rinfo.report_type == HID_REPORT_TYPE_OUTPUT) break; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; hid_hw_request(hid, report, HID_REQ_GET_REPORT); hid_hw_wait(hid); r = 0; break; case HIDIOCSREPORT: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } if (rinfo.report_type == HID_REPORT_TYPE_INPUT) break; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; hid_hw_request(hid, report, HID_REQ_SET_REPORT); hid_hw_wait(hid); r = 0; break; case HIDIOCGREPORTINFO: if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) { r = -EFAULT; break; } report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; rinfo.num_fields = report->maxfield; r = copy_to_user(user_arg, &rinfo, sizeof(rinfo)) ? -EFAULT : 0; break; case HIDIOCGFIELDINFO: if (copy_from_user(&finfo, user_arg, sizeof(finfo))) { r = -EFAULT; break; } rinfo.report_type = finfo.report_type; rinfo.report_id = finfo.report_id; report = hiddev_lookup_report(hid, &rinfo); if (report == NULL) break; if (finfo.field_index >= report->maxfield) break; finfo.field_index = array_index_nospec(finfo.field_index, report->maxfield); field = report->field[finfo.field_index]; memset(&finfo, 0, sizeof(finfo)); finfo.report_type = rinfo.report_type; finfo.report_id = rinfo.report_id; finfo.field_index = field->report_count - 1; finfo.maxusage = field->maxusage; finfo.flags = field->flags; finfo.physical = field->physical; finfo.logical = field->logical; finfo.application = field->application; finfo.logical_minimum = field->logical_minimum; finfo.logical_maximum = field->logical_maximum; finfo.physical_minimum = field->physical_minimum; finfo.physical_maximum = field->physical_maximum; finfo.unit_exponent = field->unit_exponent; finfo.unit = field->unit; r = copy_to_user(user_arg, &finfo, sizeof(finfo)) ? -EFAULT : 0; break; case HIDIOCGUCODE: case HIDIOCGUSAGE: case HIDIOCSUSAGE: case HIDIOCGUSAGES: case HIDIOCSUSAGES: case HIDIOCGCOLLECTIONINDEX: if (!hiddev->initialized) { usbhid_init_reports(hid); hiddev->initialized = true; } r = hiddev_ioctl_usage(hiddev, cmd, user_arg); break; case HIDIOCGCOLLECTIONINFO: if (copy_from_user(&cinfo, user_arg, sizeof(cinfo))) { r = -EFAULT; break; } if (cinfo.index >= hid->maxcollection) break; cinfo.index = array_index_nospec(cinfo.index, hid->maxcollection); cinfo.type = hid->collection[cinfo.index].type; cinfo.usage = hid->collection[cinfo.index].usage; cinfo.level = hid->collection[cinfo.index].level; r = copy_to_user(user_arg, &cinfo, sizeof(cinfo)) ? -EFAULT : 0; break; default: if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ) break; if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGNAME(0))) { int len = strlen(hid->name) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); r = copy_to_user(user_arg, hid->name, len) ? -EFAULT : len; break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGPHYS(0))) { int len = strlen(hid->phys) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); r = copy_to_user(user_arg, hid->phys, len) ? -EFAULT : len; break; } } ret_unlock: mutex_unlock(&hiddev->existancelock); return r; } static const struct file_operations hiddev_fops = { .owner = THIS_MODULE, .read = hiddev_read, .write = hiddev_write, .poll = hiddev_poll, .open = hiddev_open, .release = hiddev_release, .unlocked_ioctl = hiddev_ioctl, .fasync = hiddev_fasync, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static char *hiddev_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } static struct usb_class_driver hiddev_class = { .name = "hiddev%d", .devnode = hiddev_devnode, .fops = &hiddev_fops, .minor_base = HIDDEV_MINOR_BASE, }; /* * This is where hid.c calls us to connect a hid device to the hiddev driver */ int hiddev_connect(struct hid_device *hid, unsigned int force) { struct hiddev *hiddev; struct usbhid_device *usbhid = hid->driver_data; int retval; if (!force) { unsigned int i; for (i = 0; i < hid->maxcollection; i++) if (hid->collection[i].type == HID_COLLECTION_APPLICATION && !IS_INPUT_APPLICATION(hid->collection[i].usage)) break; if (i == hid->maxcollection) return -EINVAL; } if (!(hiddev = kzalloc(sizeof(struct hiddev), GFP_KERNEL))) return -ENOMEM; init_waitqueue_head(&hiddev->wait); INIT_LIST_HEAD(&hiddev->list); spin_lock_init(&hiddev->list_lock); mutex_init(&hiddev->existancelock); hid->hiddev = hiddev; hiddev->hid = hid; hiddev->exist = 1; retval = usb_register_dev(usbhid->intf, &hiddev_class); if (retval) { hid_err(hid, "Not able to get a minor for this device\n"); hid->hiddev = NULL; kfree(hiddev); return retval; } /* * If HID_QUIRK_NO_INIT_REPORTS is set, make sure we don't initialize * the reports. */ hiddev->initialized = hid->quirks & HID_QUIRK_NO_INIT_REPORTS; hiddev->minor = usbhid->intf->minor; return 0; } /* * This is where hid.c calls us to disconnect a hiddev device from the * corresponding hid device (usually because the usb device has disconnected) */ static struct usb_class_driver hiddev_class; void hiddev_disconnect(struct hid_device *hid) { struct hiddev *hiddev = hid->hiddev; struct usbhid_device *usbhid = hid->driver_data; usb_deregister_dev(usbhid->intf, &hiddev_class); mutex_lock(&hiddev->existancelock); hiddev->exist = 0; if (hiddev->open) { hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); } } |
| 4 4 12 11 12 5 5 5 5 3 4 4 4 4 5 4 4 3 7 7 7 5 6 6 7 6 6 6 14 14 7 14 14 14 14 14 7 6 8 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs directory ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uio.h> #include <linux/fscache.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" /** * struct p9_rdir - readdir accounting * @head: start offset of current dirread buffer * @tail: end offset of current dirread buffer * @buf: dirread buffer * * private structure for keeping track of readdir * allocated on demand */ struct p9_rdir { int head; int tail; uint8_t buf[]; }; /** * dt_type - return file type * @mistat: mistat structure * */ static inline int dt_type(struct p9_wstat *mistat) { unsigned long perm = mistat->mode; int rettype = DT_REG; if (perm & P9_DMDIR) rettype = DT_DIR; if (perm & P9_DMSYMLINK) rettype = DT_LNK; return rettype; } /** * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure * @buflen: Length in bytes of buffer to allocate * */ static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) { struct p9_fid *fid = filp->private_data; if (!fid->rdir) fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); return fid->rdir; } /** * v9fs_dir_readdir - iterate through a directory * @file: opened file structure * @ctx: actor we feed the entries to * */ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) { bool over; struct p9_wstat st; int err = 0; struct p9_fid *fid; int buflen; struct p9_rdir *rdir; struct kvec kvec; p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; rdir = v9fs_alloc_rdir_buf(file, buflen); if (!rdir) return -ENOMEM; kvec.iov_base = rdir->buf; kvec.iov_len = buflen; while (1) { if (rdir->tail == rdir->head) { struct iov_iter to; int n; iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); if (err) return err; if (n == 0) return 0; rdir->head = 0; rdir->tail = n; } while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); if (err <= 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } over = !dir_emit(ctx, st.name, strlen(st.name), QID2INO(&st.qid), dt_type(&st)); p9stat_free(&st); if (over) return 0; rdir->head += err; ctx->pos += err; } } } /** * v9fs_dir_readdir_dotl - iterate through a directory * @file: opened file structure * @ctx: actor we feed the entries to * */ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) { int err = 0; struct p9_fid *fid; int buflen; struct p9_rdir *rdir; struct p9_dirent curdirent; p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_READDIRHDRSZ; rdir = v9fs_alloc_rdir_buf(file, buflen); if (!rdir) return -ENOMEM; while (1) { if (rdir->tail == rdir->head) { err = p9_client_readdir(fid, rdir->buf, buflen, ctx->pos); if (err <= 0) return err; rdir->head = 0; rdir->tail = err; } while (rdir->head < rdir->tail) { err = p9dirent_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &curdirent); if (err < 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } if (!dir_emit(ctx, curdirent.d_name, strlen(curdirent.d_name), QID2INO(&curdirent.qid), curdirent.d_type)) return 0; ctx->pos = curdirent.d_off; rdir->head += err; } } } /** * v9fs_dir_release - close a directory or a file * @inode: inode of the directory or file * @filp: file pointer to a directory or file * */ int v9fs_dir_release(struct inode *inode, struct file *filp) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_fid *fid; __le32 version; loff_t i_size; int retval = 0, put_err; fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid ? fid->fid : -1); if (fid) { if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE)) retval = filemap_fdatawrite(inode->i_mapping); spin_lock(&inode->i_lock); hlist_del(&fid->ilist); spin_unlock(&inode->i_lock); put_err = p9_fid_put(fid); retval = retval < 0 ? retval : put_err; } if ((filp->f_mode & FMODE_WRITE)) { version = cpu_to_le32(v9inode->qid.version); i_size = i_size_read(inode); fscache_unuse_cookie(v9fs_inode_cookie(v9inode), &version, &i_size); } else { fscache_unuse_cookie(v9fs_inode_cookie(v9inode), NULL, NULL); } return retval; } const struct file_operations v9fs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .iterate_shared = v9fs_dir_readdir, .open = v9fs_file_open, .release = v9fs_dir_release, }; const struct file_operations v9fs_dir_operations_dotl = { .read = generic_read_dir, .llseek = generic_file_llseek, .iterate_shared = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, .fsync = v9fs_file_fsync_dotl, }; |
| 2 4 4 3 2 2 1 3 3 3 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | // SPDX-License-Identifier: GPL-2.0-only /* * Transparent proxy support for Linux/iptables * * Copyright (c) 2006-2010 BalaBit IT Ltd. * Author: Balazs Scheidler, Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <net/checksum.h> #include <net/udp.h> #include <net/tcp.h> #include <net/inet_sock.h> #include <net/inet_hashtables.h> #include <linux/inetdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_TPROXY_HAVE_IPV6 1 #include <net/if_inet6.h> #include <net/addrconf.h> #include <net/inet6_hashtables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/netfilter/nf_tproxy.h> #include <linux/netfilter/xt_TPROXY.h> static unsigned int tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp; struct sock *sk; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr); if (!lport) lport = hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static unsigned int tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } static unsigned int tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 static unsigned int tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; __be16 lport; int thoff = 0; int tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (!hp) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff, xt_net(par), &tgi->laddr.in6, tgi->lport, sk); } else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, laddr, hp->source, lport, xt_in(par), NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; int err; err = nf_defrag_ipv6_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv6_disable(par->net); } #endif static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; int err; err = nf_defrag_ipv4_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv4_disable(par->net); } static struct xt_target tproxy_tg_reg[] __read_mostly = { { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v0, .revision = 0, .targetsize = sizeof(struct xt_tproxy_target_info), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #ifdef XT_TPROXY_HAVE_IPV6 { .name = "TPROXY", .family = NFPROTO_IPV6, .table = "mangle", .target = tproxy_tg6_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg6_check, .destroy = tproxy_tg6_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #endif }; static int __init tproxy_tg_init(void) { return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } static void __exit tproxy_tg_exit(void) { xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } module_init(tproxy_tg_init); module_exit(tproxy_tg_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); MODULE_ALIAS("ipt_TPROXY"); MODULE_ALIAS("ip6t_TPROXY"); |
| 16 67 1 2 2 32 2 2 2 1 8 8 4 4 4 4 3 2 1 1 1 4 4 4 4 9 4 5 1 1 6 6 6 4 30 16 16 1 12 23 15 14 34 25 14 25 7 1 7 16 7 16 39 16 20 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * IEEE 802.11 defines * * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen * <jkmaline@cc.hut.fi> * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH * Copyright (c) 2018 - 2025 Intel Corporation */ #ifndef LINUX_IEEE80211_H #define LINUX_IEEE80211_H #include <linux/types.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/bitfield.h> #include <asm/byteorder.h> #include <linux/unaligned.h> /* * DS bit usage * * TA = transmitter address * RA = receiver address * DA = destination address * SA = source address * * ToDS FromDS A1(RA) A2(TA) A3 A4 Use * ----------------------------------------------------------------- * 0 0 DA SA BSSID - IBSS/DLS * 0 1 DA BSSID SA - AP -> STA * 1 0 BSSID SA DA - AP <- STA * 1 1 RA TA DA SA unspecified (WDS) */ #define FCS_LEN 4 #define IEEE80211_FCTL_VERS 0x0003 #define IEEE80211_FCTL_FTYPE 0x000c #define IEEE80211_FCTL_STYPE 0x00f0 #define IEEE80211_FCTL_TODS 0x0100 #define IEEE80211_FCTL_FROMDS 0x0200 #define IEEE80211_FCTL_MOREFRAGS 0x0400 #define IEEE80211_FCTL_RETRY 0x0800 #define IEEE80211_FCTL_PM 0x1000 #define IEEE80211_FCTL_MOREDATA 0x2000 #define IEEE80211_FCTL_PROTECTED 0x4000 #define IEEE80211_FCTL_ORDER 0x8000 #define IEEE80211_FCTL_CTL_EXT 0x0f00 #define IEEE80211_SCTL_FRAG 0x000F #define IEEE80211_SCTL_SEQ 0xFFF0 #define IEEE80211_FTYPE_MGMT 0x0000 #define IEEE80211_FTYPE_CTL 0x0004 #define IEEE80211_FTYPE_DATA 0x0008 #define IEEE80211_FTYPE_EXT 0x000c /* management */ #define IEEE80211_STYPE_ASSOC_REQ 0x0000 #define IEEE80211_STYPE_ASSOC_RESP 0x0010 #define IEEE80211_STYPE_REASSOC_REQ 0x0020 #define IEEE80211_STYPE_REASSOC_RESP 0x0030 #define IEEE80211_STYPE_PROBE_REQ 0x0040 #define IEEE80211_STYPE_PROBE_RESP 0x0050 #define IEEE80211_STYPE_BEACON 0x0080 #define IEEE80211_STYPE_ATIM 0x0090 #define IEEE80211_STYPE_DISASSOC 0x00A0 #define IEEE80211_STYPE_AUTH 0x00B0 #define IEEE80211_STYPE_DEAUTH 0x00C0 #define IEEE80211_STYPE_ACTION 0x00D0 /* control */ #define IEEE80211_STYPE_TRIGGER 0x0020 #define IEEE80211_STYPE_CTL_EXT 0x0060 #define IEEE80211_STYPE_BACK_REQ 0x0080 #define IEEE80211_STYPE_BACK 0x0090 #define IEEE80211_STYPE_PSPOLL 0x00A0 #define IEEE80211_STYPE_RTS 0x00B0 #define IEEE80211_STYPE_CTS 0x00C0 #define IEEE80211_STYPE_ACK 0x00D0 #define IEEE80211_STYPE_CFEND 0x00E0 #define IEEE80211_STYPE_CFENDACK 0x00F0 /* data */ #define IEEE80211_STYPE_DATA 0x0000 #define IEEE80211_STYPE_DATA_CFACK 0x0010 #define IEEE80211_STYPE_DATA_CFPOLL 0x0020 #define IEEE80211_STYPE_DATA_CFACKPOLL 0x0030 #define IEEE80211_STYPE_NULLFUNC 0x0040 #define IEEE80211_STYPE_CFACK 0x0050 #define IEEE80211_STYPE_CFPOLL 0x0060 #define IEEE80211_STYPE_CFACKPOLL 0x0070 #define IEEE80211_STYPE_QOS_DATA 0x0080 #define IEEE80211_STYPE_QOS_DATA_CFACK 0x0090 #define IEEE80211_STYPE_QOS_DATA_CFPOLL 0x00A0 #define IEEE80211_STYPE_QOS_DATA_CFACKPOLL 0x00B0 #define IEEE80211_STYPE_QOS_NULLFUNC 0x00C0 #define IEEE80211_STYPE_QOS_CFACK 0x00D0 #define IEEE80211_STYPE_QOS_CFPOLL 0x00E0 #define IEEE80211_STYPE_QOS_CFACKPOLL 0x00F0 /* extension, added by 802.11ad */ #define IEEE80211_STYPE_DMG_BEACON 0x0000 #define IEEE80211_STYPE_S1G_BEACON 0x0010 /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 #define IEEE80211_S1G_BCN_CSSID 0x200 #define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 #define IEEE80211_S1G_1MHZ_NDP_BYTES 4 #define IEEE80211_S1G_2MHZ_NDP_BITS 37 #define IEEE80211_S1G_2MHZ_NDP_BYTES 5 #define IEEE80211_NDP_FTYPE_CTS 0 #define IEEE80211_NDP_FTYPE_CF_END 0 #define IEEE80211_NDP_FTYPE_PS_POLL 1 #define IEEE80211_NDP_FTYPE_ACK 2 #define IEEE80211_NDP_FTYPE_PS_POLL_ACK 3 #define IEEE80211_NDP_FTYPE_BA 4 #define IEEE80211_NDP_FTYPE_BF_REPORT_POLL 5 #define IEEE80211_NDP_FTYPE_PAGING 6 #define IEEE80211_NDP_FTYPE_PREQ 7 #define SM64(f, v) ((((u64)v) << f##_S) & f) /* NDP CMAC frame fields */ #define IEEE80211_NDP_FTYPE 0x0000000000000007 #define IEEE80211_NDP_FTYPE_S 0x0000000000000000 /* 1M Probe Request 11ah 9.9.3.1.1 */ #define IEEE80211_NDP_1M_PREQ_ANO 0x0000000000000008 #define IEEE80211_NDP_1M_PREQ_ANO_S 3 #define IEEE80211_NDP_1M_PREQ_CSSID 0x00000000000FFFF0 #define IEEE80211_NDP_1M_PREQ_CSSID_S 4 #define IEEE80211_NDP_1M_PREQ_RTYPE 0x0000000000100000 #define IEEE80211_NDP_1M_PREQ_RTYPE_S 20 #define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 #define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 /* 2M Probe Request 11ah 9.9.3.1.2 */ #define IEEE80211_NDP_2M_PREQ_ANO 0x0000000000000008 #define IEEE80211_NDP_2M_PREQ_ANO_S 3 #define IEEE80211_NDP_2M_PREQ_CSSID 0x0000000FFFFFFFF0 #define IEEE80211_NDP_2M_PREQ_CSSID_S 4 #define IEEE80211_NDP_2M_PREQ_RTYPE 0x0000001000000000 #define IEEE80211_NDP_2M_PREQ_RTYPE_S 36 #define IEEE80211_ANO_NETTYPE_WILD 15 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 #define IEEE80211_CTL_EXT_GRANT 0x4000 #define IEEE80211_CTL_EXT_DMG_CTS 0x5000 #define IEEE80211_CTL_EXT_DMG_DTS 0x6000 #define IEEE80211_CTL_EXT_SSW 0x8000 #define IEEE80211_CTL_EXT_SSW_FBACK 0x9000 #define IEEE80211_CTL_EXT_SSW_ACK 0xa000 #define IEEE80211_SN_MASK ((IEEE80211_SCTL_SEQ) >> 4) #define IEEE80211_MAX_SN IEEE80211_SN_MASK #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) /* PV1 Layout IEEE 802.11-2020 9.8.3.1 */ #define IEEE80211_PV1_FCTL_VERS 0x0003 #define IEEE80211_PV1_FCTL_FTYPE 0x001c #define IEEE80211_PV1_FCTL_STYPE 0x00e0 #define IEEE80211_PV1_FCTL_FROMDS 0x0100 #define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200 #define IEEE80211_PV1_FCTL_PM 0x0400 #define IEEE80211_PV1_FCTL_MOREDATA 0x0800 #define IEEE80211_PV1_FCTL_PROTECTED 0x1000 #define IEEE80211_PV1_FCTL_END_SP 0x2000 #define IEEE80211_PV1_FCTL_RELAYED 0x4000 #define IEEE80211_PV1_FCTL_ACK_POLICY 0x8000 #define IEEE80211_PV1_FCTL_CTL_EXT 0x0f00 static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) { return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) { return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); } static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; } static inline u16 ieee80211_sn_inc(u16 sn) { return ieee80211_sn_add(sn, 1); } static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) { return (sn1 - sn2) & IEEE80211_SN_MASK; } #define IEEE80211_SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) #define IEEE80211_SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) /* miscellaneous IEEE 802.11 constants */ #define IEEE80211_MAX_FRAG_THRESHOLD 2352 #define IEEE80211_MAX_RTS_THRESHOLD 2353 #define IEEE80211_MAX_AID 2007 #define IEEE80211_MAX_AID_S1G 8191 #define IEEE80211_MAX_TIM_LEN 251 #define IEEE80211_MAX_MESH_PEERINGS 63 /* S1G encoding types */ #define IEEE80211_S1G_TIM_ENC_MODE_BLOCK 0 #define IEEE80211_S1G_TIM_ENC_MODE_SINGLE 1 #define IEEE80211_S1G_TIM_ENC_MODE_OLB 2 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section 6.2.1.1.2. 802.11e clarifies the figure in section 7.1.2. The frame body is up to 2304 octets long (maximum MSDU size) plus any crypt overhead. */ #define IEEE80211_MAX_DATA_LEN 2304 /* 802.11ad extends maximum MSDU size for DMG (freq > 40Ghz) networks * to 7920 bytes, see 8.2.3 General frame format */ #define IEEE80211_MAX_DATA_LEN_DMG 7920 /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 /* Maximal size of an A-MSDU that can be transported in a HT BA session */ #define IEEE80211_MAX_MPDU_LEN_HT_BA 4095 /* Maximal size of an A-MSDU */ #define IEEE80211_MAX_MPDU_LEN_HT_3839 3839 #define IEEE80211_MAX_MPDU_LEN_HT_7935 7935 #define IEEE80211_MAX_MPDU_LEN_VHT_3895 3895 #define IEEE80211_MAX_MPDU_LEN_VHT_7991 7991 #define IEEE80211_MAX_MPDU_LEN_VHT_11454 11454 #define IEEE80211_MAX_SSID_LEN 32 #define IEEE80211_MAX_MESH_ID_LEN 32 #define IEEE80211_FIRST_TSPEC_TSID 8 #define IEEE80211_NUM_TIDS 16 /* number of user priorities 802.11 uses */ #define IEEE80211_NUM_UPS 8 /* number of ACs */ #define IEEE80211_NUM_ACS 4 #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 /* TID mask */ #define IEEE80211_QOS_CTL_TID_MASK 0x000f /* EOSP */ #define IEEE80211_QOS_CTL_EOSP 0x0010 /* ACK policy */ #define IEEE80211_QOS_CTL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_QOS_CTL_ACK_POLICY_NOACK 0x0020 #define IEEE80211_QOS_CTL_ACK_POLICY_NO_EXPL 0x0040 #define IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK 0x0060 #define IEEE80211_QOS_CTL_ACK_POLICY_MASK 0x0060 /* A-MSDU 802.11n */ #define IEEE80211_QOS_CTL_A_MSDU_PRESENT 0x0080 /* Mesh Control 802.11s */ #define IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT 0x0100 /* Mesh Power Save Level */ #define IEEE80211_QOS_CTL_MESH_PS_LEVEL 0x0200 /* Mesh Receiver Service Period Initiated */ #define IEEE80211_QOS_CTL_RSPI 0x0400 /* U-APSD queue for WMM IEs sent by AP */ #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) #define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK 0x0f /* U-APSD queues for WMM IEs sent by STA */ #define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO (1<<0) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI (1<<1) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK (1<<2) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE (1<<3) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK 0x0f /* U-APSD max SP length for WMM IEs sent by STA */ #define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 0x00 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_2 0x01 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_4 0x02 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_6 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 #define IEEE80211_HT_CTL_LEN 4 /* trigger type within common_info of trigger frame */ #define IEEE80211_TRIGGER_TYPE_MASK 0xf #define IEEE80211_TRIGGER_TYPE_BASIC 0x0 #define IEEE80211_TRIGGER_TYPE_BFRP 0x1 #define IEEE80211_TRIGGER_TYPE_MU_BAR 0x2 #define IEEE80211_TRIGGER_TYPE_MU_RTS 0x3 #define IEEE80211_TRIGGER_TYPE_BSRP 0x4 #define IEEE80211_TRIGGER_TYPE_GCR_MU_BAR 0x5 #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 /* UL-bandwidth within common_info of trigger frame */ #define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 #define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 #define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 #define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 #define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; struct_group(addrs, u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; ); __le16 seq_ctrl; u8 addr4[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_hdr_3addr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; } __packed __aligned(2); struct ieee80211_qos_hdr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; __le16 qos_ctrl; } __packed __aligned(2); struct ieee80211_qos_hdr_4addr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; u8 addr4[ETH_ALEN]; __le16 qos_ctrl; } __packed __aligned(2); struct ieee80211_trigger { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; u8 ta[ETH_ALEN]; __le64 common_info; u8 variable[]; } __packed __aligned(2); /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has to-DS set */ static inline bool ieee80211_has_tods(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_TODS)) != 0; } /** * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has from-DS set */ static inline bool ieee80211_has_fromds(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FROMDS)) != 0; } /** * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder * Return: whether or not it's a 4-address frame (from-DS and to-DS set) */ static inline bool ieee80211_has_a4(__le16 fc) { __le16 tmp = cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); return (fc & tmp) == tmp; } /** * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has more fragments (more frags bit set) */ static inline bool ieee80211_has_morefrags(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREFRAGS)) != 0; } /** * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the retry flag is set */ static inline bool ieee80211_has_retry(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_RETRY)) != 0; } /** * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the power management flag is set */ static inline bool ieee80211_has_pm(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PM)) != 0; } /** * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the more data flag is set */ static inline bool ieee80211_has_moredata(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) != 0; } /** * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the protected flag is set */ static inline bool ieee80211_has_protected(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PROTECTED)) != 0; } /** * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the order flag is set */ static inline bool ieee80211_has_order(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_ORDER)) != 0; } /** * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is management */ static inline bool ieee80211_is_mgmt(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT); } /** * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is control */ static inline bool ieee80211_is_ctl(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL); } /** * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a data frame */ static inline bool ieee80211_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA); } /** * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is extended */ static inline bool ieee80211_is_ext(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_EXT); } /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS data frame */ static inline bool ieee80211_is_data_qos(__le16 fc) { /* * mask with QOS_DATA rather than IEEE80211_FCTL_STYPE as we just need * to check the one bit */ return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_STYPE_QOS_DATA)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA); } /** * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS data frame that has data * (i.e. is not null data) */ static inline bool ieee80211_is_data_present(__le16 fc) { /* * mask with 0x40 and test that that bit is clear to only return true * for the data-containing substypes. */ return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | 0x40)) == cpu_to_le16(IEEE80211_FTYPE_DATA); } /** * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an association request */ static inline bool ieee80211_is_assoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); } /** * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an association response */ static inline bool ieee80211_is_assoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_RESP); } /** * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a reassociation request */ static inline bool ieee80211_is_reassoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ); } /** * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a reassociation response */ static inline bool ieee80211_is_reassoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_RESP); } /** * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a probe request */ static inline bool ieee80211_is_probe_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); } /** * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a probe response */ static inline bool ieee80211_is_probe_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); } /** * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a (regular, not S1G) beacon */ static inline bool ieee80211_is_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } /** * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && * IEEE80211_STYPE_S1G_BEACON * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an S1G beacon */ static inline bool ieee80211_is_s1g_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } /** * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame contains the variable-length * next TBTT field */ static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) { return ieee80211_is_s1g_beacon(fc) && (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame contains the variable-length * ANO field */ static inline bool ieee80211_s1g_has_ano(__le16 fc) { return ieee80211_is_s1g_beacon(fc) && (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); } /** * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame contains the variable-length * compressed SSID field */ static inline bool ieee80211_s1g_has_cssid(__le16 fc) { return ieee80211_is_s1g_beacon(fc) && (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); } /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame */ static inline bool ieee80211_is_atim(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ATIM); } /** * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a disassociation frame */ static inline bool ieee80211_is_disassoc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DISASSOC); } /** * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an authentication frame */ static inline bool ieee80211_is_auth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); } /** * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a deauthentication frame */ static inline bool ieee80211_is_deauth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DEAUTH); } /** * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an action frame */ static inline bool ieee80211_is_action(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); } /** * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a block-ACK request frame */ static inline bool ieee80211_is_back_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); } /** * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a block-ACK frame */ static inline bool ieee80211_is_back(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK); } /** * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a PS-poll frame */ static inline bool ieee80211_is_pspoll(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); } /** * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an RTS frame */ static inline bool ieee80211_is_rts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); } /** * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CTS frame */ static inline bool ieee80211_is_cts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); } /** * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ACK frame */ static inline bool ieee80211_is_ack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_ACK); } /** * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CF-end frame */ static inline bool ieee80211_is_cfend(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFEND); } /** * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CF-end-ack frame */ static inline bool ieee80211_is_cfendack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFENDACK); } /** * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a nullfunc frame */ static inline bool ieee80211_is_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC); } /** * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS nullfunc frame */ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } /** * ieee80211_is_trigger - check if frame is trigger frame * @fc: frame control field in little-endian byteorder * Return: whether or not the frame is a trigger frame */ static inline bool ieee80211_is_trigger(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_TRIGGER); } /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a nullfunc or QoS nullfunc frame */ static inline bool ieee80211_is_any_nullfunc(__le16 fc) { return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); } /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder * Return: whether or not the frame is the first fragment (also true if * it's not fragmented at all) */ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } /** * ieee80211_is_frag - check if a frame is a fragment * @hdr: 802.11 header of the frame * Return: whether or not the frame is a fragment */ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) { return ieee80211_has_morefrags(hdr->frame_control) || hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) { return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); } struct ieee80211s_hdr { u8 flags; u8 ttl; __le32 seqnum; u8 eaddr1[ETH_ALEN]; u8 eaddr2[ETH_ALEN]; } __packed __aligned(2); /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 #define MESH_FLAGS_AE_A5_A6 0x2 #define MESH_FLAGS_AE 0x3 #define MESH_FLAGS_PS_DEEP 0x4 /** * enum ieee80211_preq_flags - mesh PREQ element flags * * @IEEE80211_PREQ_PROACTIVE_PREP_FLAG: proactive PREP subfield */ enum ieee80211_preq_flags { IEEE80211_PREQ_PROACTIVE_PREP_FLAG = 1<<2, }; /** * enum ieee80211_preq_target_flags - mesh PREQ element per target flags * * @IEEE80211_PREQ_TO_FLAG: target only subfield * @IEEE80211_PREQ_USN_FLAG: unknown target HWMP sequence number subfield */ enum ieee80211_preq_target_flags { IEEE80211_PREQ_TO_FLAG = 1<<0, IEEE80211_PREQ_USN_FLAG = 1<<2, }; /** * struct ieee80211_quiet_ie - Quiet element * @count: Quiet Count * @period: Quiet Period * @duration: Quiet Duration * @offset: Quiet Offset * * This structure represents the payload of the "Quiet element" as * described in IEEE Std 802.11-2020 section 9.4.2.22. */ struct ieee80211_quiet_ie { u8 count; u8 period; __le16 duration; __le16 offset; } __packed; /** * struct ieee80211_msrment_ie - Measurement element * @token: Measurement Token * @mode: Measurement Report Mode * @type: Measurement Type * @request: Measurement Request or Measurement Report * * This structure represents the payload of both the "Measurement * Request element" and the "Measurement Report element" as described * in IEEE Std 802.11-2020 sections 9.4.2.20 and 9.4.2.21. */ struct ieee80211_msrment_ie { u8 token; u8 mode; u8 type; u8 request[]; } __packed; /** * struct ieee80211_channel_sw_ie - Channel Switch Announcement element * @mode: Channel Switch Mode * @new_ch_num: New Channel Number * @count: Channel Switch Count * * This structure represents the payload of the "Channel Switch * Announcement element" as described in IEEE Std 802.11-2020 section * 9.4.2.18. */ struct ieee80211_channel_sw_ie { u8 mode; u8 new_ch_num; u8 count; } __packed; /** * struct ieee80211_ext_chansw_ie - Extended Channel Switch Announcement element * @mode: Channel Switch Mode * @new_operating_class: New Operating Class * @new_ch_num: New Channel Number * @count: Channel Switch Count * * This structure represents the "Extended Channel Switch Announcement * element" as described in IEEE Std 802.11-2020 section 9.4.2.52. */ struct ieee80211_ext_chansw_ie { u8 mode; u8 new_operating_class; u8 new_ch_num; u8 count; } __packed; /** * struct ieee80211_sec_chan_offs_ie - secondary channel offset IE * @sec_chan_offs: secondary channel offset, uses IEEE80211_HT_PARAM_CHA_SEC_* * values here * This structure represents the "Secondary Channel Offset element" */ struct ieee80211_sec_chan_offs_ie { u8 sec_chan_offs; } __packed; /** * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE * @mesh_ttl: Time To Live * @mesh_flags: Flags * @mesh_reason: Reason Code * @mesh_pre_value: Precedence Value * * This structure represents the payload of the "Mesh Channel Switch * Parameters element" as described in IEEE Std 802.11-2020 section * 9.4.2.102. */ struct ieee80211_mesh_chansw_params_ie { u8 mesh_ttl; u8 mesh_flags; __le16 mesh_reason; __le16 mesh_pre_value; } __packed; /** * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE * @new_channel_width: New Channel Width * @new_center_freq_seg0: New Channel Center Frequency Segment 0 * @new_center_freq_seg1: New Channel Center Frequency Segment 1 * * This structure represents the payload of the "Wide Bandwidth * Channel Switch element" as described in IEEE Std 802.11-2020 * section 9.4.2.160. */ struct ieee80211_wide_bw_chansw_ie { u8 new_channel_width; u8 new_center_freq_seg0, new_center_freq_seg1; } __packed; /** * struct ieee80211_tim_ie - Traffic Indication Map information element * @dtim_count: DTIM Count * @dtim_period: DTIM Period * @bitmap_ctrl: Bitmap Control * @required_octet: "Syntatic sugar" to force the struct size to the * minimum valid size when carried in a non-S1G PPDU * @virtual_map: Partial Virtual Bitmap * * This structure represents the payload of the "TIM element" as * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this * definition is only applicable when the element is carried in a * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; union { u8 required_octet; DECLARE_FLEX_ARRAY(u8, virtual_map); }; } __packed; /** * struct ieee80211_meshconf_ie - Mesh Configuration element * @meshconf_psel: Active Path Selection Protocol Identifier * @meshconf_pmetric: Active Path Selection Metric Identifier * @meshconf_congest: Congestion Control Mode Identifier * @meshconf_synch: Synchronization Method Identifier * @meshconf_auth: Authentication Protocol Identifier * @meshconf_form: Mesh Formation Info * @meshconf_cap: Mesh Capability (see &enum mesh_config_capab_flags) * * This structure represents the payload of the "Mesh Configuration * element" as described in IEEE Std 802.11-2020 section 9.4.2.97. */ struct ieee80211_meshconf_ie { u8 meshconf_psel; u8 meshconf_pmetric; u8 meshconf_congest; u8 meshconf_synch; u8 meshconf_auth; u8 meshconf_form; u8 meshconf_cap; } __packed; /** * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags * * @IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS: STA is willing to establish * additional mesh peerings with other mesh STAs * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure * is ongoing * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has * neighbors in deep sleep mode * * Enumerates the "Mesh Capability" as described in IEEE Std * 802.11-2020 section 9.4.2.97.7. */ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01, IEEE80211_MESHCONF_CAPAB_FORWARDING = 0x08, IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING = 0x20, IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL = 0x40, }; #define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1 /* * mesh channel switch parameters element's flag indicator * */ #define WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT BIT(0) #define WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR BIT(1) #define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2) /** * struct ieee80211_rann_ie - RANN (root announcement) element * @rann_flags: Flags * @rann_hopcount: Hop Count * @rann_ttl: Element TTL * @rann_addr: Root Mesh STA Address * @rann_seq: HWMP Sequence Number * @rann_interval: Interval * @rann_metric: Metric * * This structure represents the payload of the "RANN element" as * described in IEEE Std 802.11-2020 section 9.4.2.111. */ struct ieee80211_rann_ie { u8 rann_flags; u8 rann_hopcount; u8 rann_ttl; u8 rann_addr[ETH_ALEN]; __le32 rann_seq; __le32 rann_interval; __le32 rann_metric; } __packed; enum ieee80211_rann_flags { RANN_FLAG_IS_GATE = 1 << 0, }; enum ieee80211_ht_chanwidth_values { IEEE80211_HT_CHANWIDTH_20MHZ = 0, IEEE80211_HT_CHANWIDTH_ANY = 1, }; /** * enum ieee80211_vht_opmode_bits - VHT operating mode field bits * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width * @IEEE80211_OPMODE_NOTIF_BW_160_80P80: 160 / 80+80 MHz indicator flag * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask * (the NSS value is the value of this field + 1) * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU * using a beamforming steering matrix */ enum ieee80211_vht_opmode_bits { IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 0x03, IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ = 0, IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ = 1, IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ = 2, IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ = 3, IEEE80211_OPMODE_NOTIF_BW_160_80P80 = 0x04, IEEE80211_OPMODE_NOTIF_RX_NSS_MASK = 0x70, IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, }; /** * enum ieee80211_s1g_chanwidth - S1G channel widths * These are defined in IEEE802.11-2016ah Table 10-20 * as BSS Channel Width * * @IEEE80211_S1G_CHANWIDTH_1MHZ: 1MHz operating channel * @IEEE80211_S1G_CHANWIDTH_2MHZ: 2MHz operating channel * @IEEE80211_S1G_CHANWIDTH_4MHZ: 4MHz operating channel * @IEEE80211_S1G_CHANWIDTH_8MHZ: 8MHz operating channel * @IEEE80211_S1G_CHANWIDTH_16MHZ: 16MHz operating channel */ enum ieee80211_s1g_chanwidth { IEEE80211_S1G_CHANWIDTH_1MHZ = 0, IEEE80211_S1G_CHANWIDTH_2MHZ = 1, IEEE80211_S1G_CHANWIDTH_4MHZ = 3, IEEE80211_S1G_CHANWIDTH_8MHZ = 7, IEEE80211_S1G_CHANWIDTH_16MHZ = 15, }; /** * enum ieee80211_s1g_pri_chanwidth - S1G primary channel widths * described in IEEE80211-2024 Table 10-39. * * @IEEE80211_S1G_PRI_CHANWIDTH_2MHZ: 2MHz primary channel * @IEEE80211_S1G_PRI_CHANWIDTH_1MHZ: 1MHz primary channel */ enum ieee80211_s1g_pri_chanwidth { IEEE80211_S1G_PRI_CHANWIDTH_2MHZ = 0, IEEE80211_S1G_PRI_CHANWIDTH_1MHZ = 1, }; #define WLAN_SA_QUERY_TR_ID_LEN 2 #define WLAN_MEMBERSHIP_LEN 8 #define WLAN_USER_POSITION_LEN 16 /** * struct ieee80211_tpc_report_ie - TPC Report element * @tx_power: Transmit Power * @link_margin: Link Margin * * This structure represents the payload of the "TPC Report element" as * described in IEEE Std 802.11-2020 section 9.4.2.16. */ struct ieee80211_tpc_report_ie { u8 tx_power; u8 link_margin; } __packed; #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) #define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) #define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 struct ieee80211_addba_ext_ie { u8 data; } __packed; /** * struct ieee80211_s1g_bcn_compat_ie - S1G Beacon Compatibility element * @compat_info: Compatibility Information * @beacon_int: Beacon Interval * @tsf_completion: TSF Completion * * This structure represents the payload of the "S1G Beacon * Compatibility element" as described in IEEE Std 802.11-2020 section * 9.4.2.196. */ struct ieee80211_s1g_bcn_compat_ie { __le16 compat_info; __le16 beacon_int; __le32 tsf_completion; } __packed; /** * struct ieee80211_s1g_oper_ie - S1G Operation element * @ch_width: S1G Operation Information Channel Width * @oper_class: S1G Operation Information Operating Class * @primary_ch: S1G Operation Information Primary Channel Number * @oper_ch: S1G Operation Information Channel Center Frequency * @basic_mcs_nss: Basic S1G-MCS and NSS Set * * This structure represents the payload of the "S1G Operation * element" as described in IEEE Std 802.11-2020 section 9.4.2.212. */ struct ieee80211_s1g_oper_ie { u8 ch_width; u8 oper_class; u8 primary_ch; u8 oper_ch; __le16 basic_mcs_nss; } __packed; /** * struct ieee80211_aid_response_ie - AID Response element * @aid: AID/Group AID * @switch_count: AID Switch Count * @response_int: AID Response Interval * * This structure represents the payload of the "AID Response element" * as described in IEEE Std 802.11-2020 section 9.4.2.194. */ struct ieee80211_aid_response_ie { __le16 aid; u8 switch_count; __le16 response_int; } __packed; struct ieee80211_s1g_cap { u8 capab_info[10]; u8 supp_mcs_nss[5]; } __packed; struct ieee80211_ext { __le16 frame_control; __le16 duration; union { struct { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); /** * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields * @fc: frame control bytes in little-endian byteorder * Return: total length in bytes of the optional fixed-length fields * * S1G beacons may contain up to three optional fixed-length fields that * precede the variable-length elements. Whether these fields are present * is indicated by flags in the frame control field. * * From IEEE 802.11-2024 section 9.3.4.3: * - Next TBTT field may be 0 or 3 bytes * - Short SSID field may be 0 or 4 bytes * - Access Network Options (ANO) field may be 0 or 1 byte */ static inline size_t ieee80211_s1g_optional_len(__le16 fc) { size_t len = 0; if (ieee80211_s1g_has_next_tbtt(fc)) len += 3; if (ieee80211_s1g_has_cssid(fc)) len += 4; if (ieee80211_s1g_has_ano(fc)) len += 1; return len; } #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) #define IEEE80211_TWT_CONTROL_RX_DISABLED BIT(4) #define IEEE80211_TWT_CONTROL_WAKE_DUR_UNIT BIT(5) #define IEEE80211_TWT_REQTYPE_REQUEST BIT(0) #define IEEE80211_TWT_REQTYPE_SETUP_CMD GENMASK(3, 1) #define IEEE80211_TWT_REQTYPE_TRIGGER BIT(4) #define IEEE80211_TWT_REQTYPE_IMPLICIT BIT(5) #define IEEE80211_TWT_REQTYPE_FLOWTYPE BIT(6) #define IEEE80211_TWT_REQTYPE_FLOWID GENMASK(9, 7) #define IEEE80211_TWT_REQTYPE_WAKE_INT_EXP GENMASK(14, 10) #define IEEE80211_TWT_REQTYPE_PROTECTION BIT(15) enum ieee80211_twt_setup_cmd { TWT_SETUP_CMD_REQUEST, TWT_SETUP_CMD_SUGGEST, TWT_SETUP_CMD_DEMAND, TWT_SETUP_CMD_GROUPING, TWT_SETUP_CMD_ACCEPT, TWT_SETUP_CMD_ALTERNATE, TWT_SETUP_CMD_DICTATE, TWT_SETUP_CMD_REJECT, }; struct ieee80211_twt_params { __le16 req_type; __le64 twt; u8 min_twt_dur; __le16 mantissa; u8 channel; } __packed; struct ieee80211_twt_setup { u8 dialog_token; u8 element_id; u8 length; u8 control; u8 params[]; } __packed; #define IEEE80211_TTLM_MAX_CNT 2 #define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 #define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 #define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 #define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 #define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 #define IEEE80211_TTLM_DIRECTION_DOWN 0 #define IEEE80211_TTLM_DIRECTION_UP 1 #define IEEE80211_TTLM_DIRECTION_BOTH 2 /** * struct ieee80211_ttlm_elem - TID-To-Link Mapping element * * Defined in section 9.4.2.314 in P802.11be_D4 * * @control: the first part of control field * @optional: the second part of control field */ struct ieee80211_ttlm_elem { u8 control; u8 optional[]; } __packed; /** * struct ieee80211_bss_load_elem - BSS Load elemen * * Defined in section 9.4.2.26 in IEEE 802.11-REVme D4.1 * * @sta_count: total number of STAs currently associated with the AP. * @channel_util: Percentage of time that the access point sensed the channel * was busy. This value is in range [0, 255], the highest value means * 100% busy. * @avail_admission_capa: remaining amount of medium time used for admission * control. */ struct ieee80211_bss_load_elem { __le16 sta_count; u8 channel_util; __le16 avail_admission_capa; } __packed; struct ieee80211_mgmt { __le16 frame_control; __le16 duration; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; u8 bssid[ETH_ALEN]; __le16 seq_ctrl; union { struct { __le16 auth_alg; __le16 auth_transaction; __le16 status_code; /* possibly followed by Challenge text */ u8 variable[]; } __packed auth; struct { __le16 reason_code; } __packed deauth; struct { __le16 capab_info; __le16 listen_interval; /* followed by SSID and Supported rates */ u8 variable[]; } __packed assoc_req; struct { __le16 capab_info; __le16 status_code; __le16 aid; /* followed by Supported rates */ u8 variable[]; } __packed assoc_resp, reassoc_resp; struct { __le16 capab_info; __le16 status_code; u8 variable[]; } __packed s1g_assoc_resp, s1g_reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; u8 current_ap[ETH_ALEN]; /* followed by SSID and Supported rates */ u8 variable[]; } __packed reassoc_req; struct { __le16 reason_code; } __packed disassoc; struct { __le64 timestamp; __le16 beacon_int; __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params, TIM */ u8 variable[]; } __packed beacon; struct { /* only variable items: SSID, Supported rates */ DECLARE_FLEX_ARRAY(u8, variable); } __packed probe_req; struct { __le64 timestamp; __le16 beacon_int; __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params */ u8 variable[]; } __packed probe_resp; struct { u8 category; union { struct { u8 action_code; u8 dialog_token; u8 status_code; u8 variable[]; } __packed wme_action; struct{ u8 action_code; u8 variable[]; } __packed chan_switch; struct{ u8 action_code; struct ieee80211_ext_chansw_ie data; u8 variable[]; } __packed ext_chan_switch; struct{ u8 action_code; u8 dialog_token; u8 element_id; u8 length; struct ieee80211_msrment_ie msr_elem; } __packed measurement; struct{ u8 action_code; u8 dialog_token; __le16 capab; __le16 timeout; __le16 start_seq_num; /* followed by BA Extension */ u8 variable[]; } __packed addba_req; struct{ u8 action_code; u8 dialog_token; __le16 status; __le16 capab; __le16 timeout; /* followed by BA Extension */ u8 variable[]; } __packed addba_resp; struct{ u8 action_code; __le16 params; __le16 reason_code; } __packed delba; struct { u8 action_code; u8 variable[]; } __packed self_prot; struct{ u8 action_code; u8 variable[]; } __packed mesh_action; struct { u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __packed sa_query; struct { u8 action; u8 smps_control; } __packed ht_smps; struct { u8 action_code; u8 chanwidth; } __packed ht_notify_cw; struct { u8 action_code; u8 dialog_token; __le16 capability; u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; struct { u8 action_code; u8 membership[WLAN_MEMBERSHIP_LEN]; u8 position[WLAN_USER_POSITION_LEN]; } __packed vht_group_notif; struct { u8 action_code; u8 dialog_token; u8 tpc_elem_id; u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; struct { u8 action_code; u8 dialog_token; u8 follow_up; u8 tod[6]; u8 toa[6]; __le16 tod_error; __le16 toa_error; u8 variable[]; } __packed ftm; struct { u8 action_code; u8 variable[]; } __packed s1g; struct { u8 action_code; u8 dialog_token; u8 follow_up; u32 tod; u32 toa; u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; struct { u8 action_code; u8 dialog_token; u8 variable[]; } __packed ttlm_req; struct { u8 action_code; u8 dialog_token; __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { u8 action_code; } __packed ttlm_tear_down; struct { u8 action_code; u8 dialog_token; u8 variable[]; } __packed ml_reconf_req; struct { u8 action_code; u8 dialog_token; u8 count; u8 variable[]; } __packed ml_reconf_resp; struct { u8 action_code; u8 variable[]; } __packed epcs; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ } u; } __packed __aligned(2); /* Supported rates membership selectors */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_GLK 125 #define BSS_MEMBERSHIP_SELECTOR_EPD 124 #define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 #define BSS_MEMBERSHIP_SELECTOR_EHT_PHY 121 #define BSS_MEMBERSHIP_SELECTOR_MIN BSS_MEMBERSHIP_SELECTOR_EHT_PHY /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) /* Management MIC information element (IEEE 802.11w) */ struct ieee80211_mmie { u8 element_id; u8 length; __le16 key_id; u8 sequence_number[6]; u8 mic[8]; } __packed; /* Management MIC information element (IEEE 802.11w) for GMAC and CMAC-256 */ struct ieee80211_mmie_16 { u8 element_id; u8 length; __le16 key_id; u8 sequence_number[6]; u8 mic[16]; } __packed; struct ieee80211_vendor_ie { u8 element_id; u8 len; u8 oui[3]; u8 oui_type; } __packed; struct ieee80211_wmm_ac_param { u8 aci_aifsn; /* AIFSN, ACM, ACI */ u8 cw; /* ECWmin, ECWmax (CW = 2^ECW - 1) */ __le16 txop_limit; } __packed; struct ieee80211_wmm_param_ie { u8 element_id; /* Element ID: 221 (0xdd); */ u8 len; /* Length: 24 */ /* required fields for WMM version 1 */ u8 oui[3]; /* 00:50:f2 */ u8 oui_type; /* 2 */ u8 oui_subtype; /* 1 */ u8 version; /* 1 for WMM version 1.0 */ u8 qos_info; /* AP/STA specific QoS info */ u8 reserved; /* 0 */ /* AC_BE, AC_BK, AC_VI, AC_VO */ struct ieee80211_wmm_ac_param ac[4]; } __packed; /* Control frames */ struct ieee80211_rts { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; u8 ta[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_cts { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_pspoll { __le16 frame_control; __le16 aid; u8 bssid[ETH_ALEN]; u8 ta[ETH_ALEN]; } __packed __aligned(2); /* TDLS */ /* Channel switch timing */ struct ieee80211_ch_switch_timing { __le16 switch_time; __le16 switch_timeout; } __packed; /* Link-id information element */ struct ieee80211_tdls_lnkie { u8 ie_type; /* Link Identifier IE */ u8 ie_len; u8 bssid[ETH_ALEN]; u8 init_sta[ETH_ALEN]; u8 resp_sta[ETH_ALEN]; } __packed; struct ieee80211_tdls_data { u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; __be16 ether_type; u8 payload_type; u8 category; u8 action_code; union { struct { u8 dialog_token; __le16 capability; u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; u8 variable[]; } __packed teardown; struct { u8 dialog_token; u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; u8 variable[]; } __packed chan_switch_resp; } u; } __packed; /* * Peer-to-Peer IE attribute related definitions. */ /* * enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute. */ enum ieee80211_p2p_attr_id { IEEE80211_P2P_ATTR_STATUS = 0, IEEE80211_P2P_ATTR_MINOR_REASON, IEEE80211_P2P_ATTR_CAPABILITY, IEEE80211_P2P_ATTR_DEVICE_ID, IEEE80211_P2P_ATTR_GO_INTENT, IEEE80211_P2P_ATTR_GO_CONFIG_TIMEOUT, IEEE80211_P2P_ATTR_LISTEN_CHANNEL, IEEE80211_P2P_ATTR_GROUP_BSSID, IEEE80211_P2P_ATTR_EXT_LISTEN_TIMING, IEEE80211_P2P_ATTR_INTENDED_IFACE_ADDR, IEEE80211_P2P_ATTR_MANAGABILITY, IEEE80211_P2P_ATTR_CHANNEL_LIST, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, IEEE80211_P2P_ATTR_DEVICE_INFO, IEEE80211_P2P_ATTR_GROUP_INFO, IEEE80211_P2P_ATTR_GROUP_ID, IEEE80211_P2P_ATTR_INTERFACE, IEEE80211_P2P_ATTR_OPER_CHANNEL, IEEE80211_P2P_ATTR_INVITE_FLAGS, /* 19 - 220: Reserved */ IEEE80211_P2P_ATTR_VENDOR_SPECIFIC = 221, IEEE80211_P2P_ATTR_MAX }; /* Notice of Absence attribute - described in P2P spec 4.1.14 */ /* Typical max value used here */ #define IEEE80211_P2P_NOA_DESC_MAX 4 struct ieee80211_p2p_noa_desc { u8 count; __le32 duration; __le32 interval; __le32 start_time; } __packed; struct ieee80211_p2p_noa_attr { u8 index; u8 oppps_ctwindow; struct ieee80211_p2p_noa_desc desc[IEEE80211_P2P_NOA_DESC_MAX]; } __packed; #define IEEE80211_P2P_OPPPS_ENABLE_BIT BIT(7) #define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F /** * struct ieee80211_bar - Block Ack Request frame format * @frame_control: Frame Control * @duration: Duration * @ra: RA * @ta: TA * @control: BAR Control * @start_seq_num: Starting Sequence Number (see Figure 9-37) * * This structure represents the "BlockAckReq frame format" * as described in IEEE Std 802.11-2020 section 9.3.1.7. */ struct ieee80211_bar { __le16 frame_control; __le16 duration; __u8 ra[ETH_ALEN]; __u8 ta[ETH_ALEN]; __le16 control; __le16 start_seq_num; } __packed; /* 802.11 BAR control masks */ #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_BAR_CTRL_MULTI_TID 0x0002 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA 0x0004 #define IEEE80211_BAR_CTRL_TID_INFO_MASK 0xf000 #define IEEE80211_BAR_CTRL_TID_INFO_SHIFT 12 #define IEEE80211_HT_MCS_MASK_LEN 10 /** * struct ieee80211_mcs_info - Supported MCS Set field * @rx_mask: RX mask * @rx_highest: highest supported RX rate. If set represents * the highest supported RX data rate in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * @tx_params: TX parameters * @reserved: Reserved bits * * This structure represents the "Supported MCS Set field" as * described in IEEE Std 802.11-2020 section 9.4.2.55.4. */ struct ieee80211_mcs_info { u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; __le16 rx_highest; u8 tx_params; u8 reserved[3]; } __packed; /* 802.11n HT capability MSC set */ #define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff #define IEEE80211_HT_MCS_TX_DEFINED 0x01 #define IEEE80211_HT_MCS_TX_RX_DIFF 0x02 /* value 0 == 1 stream etc */ #define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK 0x0C #define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT 2 #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 #define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] * - remainder are multiple spatial streams using unequal modulation */ #define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33 #define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \ (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) /** * struct ieee80211_ht_cap - HT capabilities element * @cap_info: HT Capability Information * @ampdu_params_info: A-MPDU Parameters * @mcs: Supported MCS Set * @extended_ht_cap_info: HT Extended Capabilities * @tx_BF_cap_info: Transmit Beamforming Capabilities * @antenna_selection_info: ASEL Capability * * This structure represents the payload of the "HT Capabilities * element" as described in IEEE Std 802.11-2020 section 9.4.2.55. */ struct ieee80211_ht_cap { __le16 cap_info; u8 ampdu_params_info; /* 16 bytes MCS information */ struct ieee80211_mcs_info mcs; __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; } __packed; /* 802.11n HT capabilities masks (for cap_info) */ #define IEEE80211_HT_CAP_LDPC_CODING 0x0001 #define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 #define IEEE80211_HT_CAP_SM_PS 0x000C #define IEEE80211_HT_CAP_SM_PS_SHIFT 2 #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 #define IEEE80211_HT_CAP_TX_STBC 0x0080 #define IEEE80211_HT_CAP_RX_STBC 0x0300 #define IEEE80211_HT_CAP_RX_STBC_SHIFT 8 #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 #define IEEE80211_HT_CAP_DSSSCCK40 0x1000 #define IEEE80211_HT_CAP_RESERVED 0x2000 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 /* 802.11n HT extended capabilities masks (for extended_ht_cap_info) */ #define IEEE80211_HT_EXT_CAP_PCO 0x0001 #define IEEE80211_HT_EXT_CAP_PCO_TIME 0x0006 #define IEEE80211_HT_EXT_CAP_PCO_TIME_SHIFT 1 #define IEEE80211_HT_EXT_CAP_MCS_FB 0x0300 #define IEEE80211_HT_EXT_CAP_MCS_FB_SHIFT 8 #define IEEE80211_HT_EXT_CAP_HTC_SUP 0x0400 #define IEEE80211_HT_EXT_CAP_RD_RESPONDER 0x0800 /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ #define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 #define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C #define IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT 2 /* * Maximum length of AMPDU that the STA can receive in high-throughput (HT). * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) */ enum ieee80211_max_ampdu_length_exp { IEEE80211_HT_MAX_AMPDU_8K = 0, IEEE80211_HT_MAX_AMPDU_16K = 1, IEEE80211_HT_MAX_AMPDU_32K = 2, IEEE80211_HT_MAX_AMPDU_64K = 3 }; /* * Maximum length of AMPDU that the STA can receive in VHT. * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) */ enum ieee80211_vht_max_ampdu_length_exp { IEEE80211_VHT_MAX_AMPDU_8K = 0, IEEE80211_VHT_MAX_AMPDU_16K = 1, IEEE80211_VHT_MAX_AMPDU_32K = 2, IEEE80211_VHT_MAX_AMPDU_64K = 3, IEEE80211_VHT_MAX_AMPDU_128K = 4, IEEE80211_VHT_MAX_AMPDU_256K = 5, IEEE80211_VHT_MAX_AMPDU_512K = 6, IEEE80211_VHT_MAX_AMPDU_1024K = 7 }; #define IEEE80211_HT_MAX_AMPDU_FACTOR 13 /* Minimum MPDU start spacing */ enum ieee80211_min_mpdu_spacing { IEEE80211_HT_MPDU_DENSITY_NONE = 0, /* No restriction */ IEEE80211_HT_MPDU_DENSITY_0_25 = 1, /* 1/4 usec */ IEEE80211_HT_MPDU_DENSITY_0_5 = 2, /* 1/2 usec */ IEEE80211_HT_MPDU_DENSITY_1 = 3, /* 1 usec */ IEEE80211_HT_MPDU_DENSITY_2 = 4, /* 2 usec */ IEEE80211_HT_MPDU_DENSITY_4 = 5, /* 4 usec */ IEEE80211_HT_MPDU_DENSITY_8 = 6, /* 8 usec */ IEEE80211_HT_MPDU_DENSITY_16 = 7 /* 16 usec */ }; /** * struct ieee80211_ht_operation - HT operation IE * @primary_chan: Primary Channel * @ht_param: HT Operation Information parameters * @operation_mode: HT Operation Information operation mode * @stbc_param: HT Operation Information STBC params * @basic_set: Basic HT-MCS Set * * This structure represents the payload of the "HT Operation * element" as described in IEEE Std 802.11-2020 section 9.4.2.56. */ struct ieee80211_ht_operation { u8 primary_chan; u8 ht_param; __le16 operation_mode; __le16 stbc_param; u8 basic_set[16]; } __packed; /* for ht_param */ #define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 #define IEEE80211_HT_PARAM_CHA_SEC_NONE 0x00 #define IEEE80211_HT_PARAM_CHA_SEC_ABOVE 0x01 #define IEEE80211_HT_PARAM_CHA_SEC_BELOW 0x03 #define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY 0x04 #define IEEE80211_HT_PARAM_RIFS_MODE 0x08 /* for operation_mode */ #define IEEE80211_HT_OP_MODE_PROTECTION 0x0003 #define IEEE80211_HT_OP_MODE_PROTECTION_NONE 0 #define IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER 1 #define IEEE80211_HT_OP_MODE_PROTECTION_20MHZ 2 #define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 #define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 #define IEEE80211_HT_OP_MODE_CCFS2_SHIFT 5 #define IEEE80211_HT_OP_MODE_CCFS2_MASK 0x1fe0 /* for stbc_param */ #define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 #define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT 0x0080 #define IEEE80211_HT_STBC_PARAM_STBC_BEACON 0x0100 #define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT 0x0200 #define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE 0x0400 #define IEEE80211_HT_STBC_PARAM_PCO_PHASE 0x0800 /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_AMSDU_MASK 0x0001 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0 #define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 /* * A-MPDU buffer sizes * According to HT size varies from 8 to 64 frames * HE adds the ability to have up to 256 frames. * EHT adds the ability to have up to 1K frames. */ #define IEEE80211_MIN_AMPDU_BUF 0x8 #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 #define IEEE80211_MAX_AMPDU_BUF_HE 0x100 #define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 /* Spatial Multiplexing Power Save Modes (for capability) */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 #define WLAN_HT_CAP_SM_PS_INVALID 2 #define WLAN_HT_CAP_SM_PS_DISABLED 3 /* for SM power control field lower two bits */ #define WLAN_HT_SMPS_CONTROL_DISABLED 0 #define WLAN_HT_SMPS_CONTROL_STATIC 1 #define WLAN_HT_SMPS_CONTROL_DYNAMIC 3 /** * struct ieee80211_vht_mcs_info - VHT MCS information * @rx_mcs_map: RX MCS map 2 bits for each stream, total 8 streams * @rx_highest: Indicates highest long GI VHT PPDU data rate * STA can receive. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * The top 3 bits of this field indicate the Maximum NSTS,total * (a beamformee capability.) * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams * @tx_highest: Indicates highest long GI VHT PPDU data rate * STA can transmit. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest TX data rate supported. * The top 2 bits of this field are reserved, the * 3rd bit from the top indiciates VHT Extended NSS BW * Capability. */ struct ieee80211_vht_mcs_info { __le16 rx_mcs_map; __le16 rx_highest; __le16 tx_mcs_map; __le16 tx_highest; } __packed; /* for rx_highest */ #define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT 13 #define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK (7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT) /* for tx_highest */ #define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) /** * enum ieee80211_vht_mcs_support - VHT MCS support definitions * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the * number of streams * @IEEE80211_VHT_MCS_SUPPORT_0_8: MCSes 0-8 are supported * @IEEE80211_VHT_MCS_SUPPORT_0_9: MCSes 0-9 are supported * @IEEE80211_VHT_MCS_NOT_SUPPORTED: This number of streams isn't supported * * These definitions are used in each 2-bit subfield of the @rx_mcs_map * and @tx_mcs_map fields of &struct ieee80211_vht_mcs_info, which are * both split into 8 subfields by number of streams. These values indicate * which MCSes are supported for the number of streams the value appears * for. */ enum ieee80211_vht_mcs_support { IEEE80211_VHT_MCS_SUPPORT_0_7 = 0, IEEE80211_VHT_MCS_SUPPORT_0_8 = 1, IEEE80211_VHT_MCS_SUPPORT_0_9 = 2, IEEE80211_VHT_MCS_NOT_SUPPORTED = 3, }; /** * struct ieee80211_vht_cap - VHT capabilities * * This structure is the "VHT capabilities element" as * described in 802.11ac D3.0 8.4.2.160 * @vht_cap_info: VHT capability info * @supp_mcs: VHT MCS supported rates */ struct ieee80211_vht_cap { __le32 vht_cap_info; struct ieee80211_vht_mcs_info supp_mcs; } __packed; /** * enum ieee80211_vht_chanwidth - VHT channel width * @IEEE80211_VHT_CHANWIDTH_USE_HT: use the HT operation IE to * determine the channel width (20 or 40 MHz) * @IEEE80211_VHT_CHANWIDTH_80MHZ: 80 MHz bandwidth * @IEEE80211_VHT_CHANWIDTH_160MHZ: 160 MHz bandwidth * @IEEE80211_VHT_CHANWIDTH_80P80MHZ: 80+80 MHz bandwidth */ enum ieee80211_vht_chanwidth { IEEE80211_VHT_CHANWIDTH_USE_HT = 0, IEEE80211_VHT_CHANWIDTH_80MHZ = 1, IEEE80211_VHT_CHANWIDTH_160MHZ = 2, IEEE80211_VHT_CHANWIDTH_80P80MHZ = 3, }; /** * struct ieee80211_vht_operation - VHT operation IE * * This structure is the "VHT operation element" as * described in 802.11ac D3.0 8.4.2.161 * @chan_width: Operating channel width * @center_freq_seg0_idx: center freq segment 0 index * @center_freq_seg1_idx: center freq segment 1 index * @basic_mcs_set: VHT Basic MCS rate set */ struct ieee80211_vht_operation { u8 chan_width; u8 center_freq_seg0_idx; u8 center_freq_seg1_idx; __le16 basic_mcs_set; } __packed; /** * struct ieee80211_he_cap_elem - HE capabilities element * @mac_cap_info: HE MAC Capabilities Information * @phy_cap_info: HE PHY Capabilities Information * * This structure represents the fixed fields of the payload of the * "HE capabilities element" as described in IEEE Std 802.11ax-2021 * sections 9.4.2.248.2 and 9.4.2.248.3. */ struct ieee80211_he_cap_elem { u8 mac_cap_info[6]; u8 phy_cap_info[11]; } __packed; #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN 5 /** * enum ieee80211_he_mcs_support - HE MCS support definitions * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the * number of streams * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported * * These definitions are used in each 2-bit subfield of the rx_mcs_* * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are * both split into 8 subfields by number of streams. These values indicate * which MCSes are supported for the number of streams the value appears * for. */ enum ieee80211_he_mcs_support { IEEE80211_HE_MCS_SUPPORT_0_7 = 0, IEEE80211_HE_MCS_SUPPORT_0_9 = 1, IEEE80211_HE_MCS_SUPPORT_0_11 = 2, IEEE80211_HE_MCS_NOT_SUPPORTED = 3, }; /** * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field * * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field * described in P802.11ax_D2.0 section 9.4.2.237.4 * * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel * widths less than 80MHz. * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel * widths less than 80MHz. * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel * width 160MHz. * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel * width 160MHz. * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for * channel width 80p80MHz. * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for * channel width 80p80MHz. */ struct ieee80211_he_mcs_nss_supp { __le16 rx_mcs_80; __le16 tx_mcs_80; __le16 rx_mcs_160; __le16 tx_mcs_160; __le16 rx_mcs_80p80; __le16 tx_mcs_80p80; } __packed; /** * struct ieee80211_he_operation - HE Operation element * @he_oper_params: HE Operation Parameters + BSS Color Information * @he_mcs_nss_set: Basic HE-MCS And NSS Set * @optional: Optional fields VHT Operation Information, Max Co-Hosted * BSSID Indicator, and 6 GHz Operation Information * * This structure represents the payload of the "HE Operation * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.249. */ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; u8 optional[]; } __packed; /** * struct ieee80211_he_spr - Spatial Reuse Parameter Set element * @he_sr_control: SR Control * @optional: Optional fields Non-SRG OBSS PD Max Offset, SRG OBSS PD * Min Offset, SRG OBSS PD Max Offset, SRG BSS Color * Bitmap, and SRG Partial BSSID Bitmap * * This structure represents the payload of the "Spatial Reuse * Parameter Set element" as described in IEEE Std 802.11ax-2021 * section 9.4.2.252. */ struct ieee80211_he_spr { u8 he_sr_control; u8 optional[]; } __packed; /** * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field * @aifsn: ACI/AIFSN * @ecw_min_max: ECWmin/ECWmax * @mu_edca_timer: MU EDCA Timer * * This structure represents the "MU AC Parameter Record" as described * in IEEE Std 802.11ax-2021 section 9.4.2.251, Figure 9-788p. */ struct ieee80211_he_mu_edca_param_ac_rec { u8 aifsn; u8 ecw_min_max; u8 mu_edca_timer; } __packed; /** * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element * @mu_qos_info: QoS Info * @ac_be: MU AC_BE Parameter Record * @ac_bk: MU AC_BK Parameter Record * @ac_vi: MU AC_VI Parameter Record * @ac_vo: MU AC_VO Parameter Record * * This structure represents the payload of the "MU EDCA Parameter Set * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.251. */ struct ieee80211_mu_edca_param_set { u8 mu_qos_info; struct ieee80211_he_mu_edca_param_ac_rec ac_be; struct ieee80211_he_mu_edca_param_ac_rec ac_bk; struct ieee80211_he_mu_edca_param_ac_rec ac_vi; struct ieee80211_he_mu_edca_param_ac_rec ac_vo; } __packed; #define IEEE80211_EHT_MCS_NSS_RX 0x0f #define IEEE80211_EHT_MCS_NSS_TX 0xf0 /** * struct ieee80211_eht_mcs_nss_supp_20mhz_only - EHT 20MHz only station max * supported NSS for per MCS. * * For each field below, bits 0 - 3 indicate the maximal number of spatial * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams * for Tx. * * @rx_tx_mcs7_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 0 - 7. * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 8 - 9. * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 10 - 11. * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 12 - 13. * @rx_tx_max_nss: array of the previous fields for easier loop access */ struct ieee80211_eht_mcs_nss_supp_20mhz_only { union { struct { u8 rx_tx_mcs7_max_nss; u8 rx_tx_mcs9_max_nss; u8 rx_tx_mcs11_max_nss; u8 rx_tx_mcs13_max_nss; }; u8 rx_tx_max_nss[4]; }; }; /** * struct ieee80211_eht_mcs_nss_supp_bw - EHT max supported NSS per MCS (except * 20MHz only stations). * * For each field below, bits 0 - 3 indicate the maximal number of spatial * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams * for Tx. * * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 0 - 9. * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 10 - 11. * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 12 - 13. * @rx_tx_max_nss: array of the previous fields for easier loop access */ struct ieee80211_eht_mcs_nss_supp_bw { union { struct { u8 rx_tx_mcs9_max_nss; u8 rx_tx_mcs11_max_nss; u8 rx_tx_mcs13_max_nss; }; u8 rx_tx_max_nss[3]; }; }; /** * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data * * This structure is the "EHT Capabilities element" fixed fields as * described in P802.11be_D2.0 section 9.4.2.313. * * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* */ struct ieee80211_eht_cap_elem_fixed { u8 mac_cap_info[2]; u8 phy_cap_info[9]; } __packed; /** * struct ieee80211_eht_cap_elem - EHT capabilities element * @fixed: fixed parts, see &ieee80211_eht_cap_elem_fixed * @optional: optional parts */ struct ieee80211_eht_cap_elem { struct ieee80211_eht_cap_elem_fixed fixed; /* * Followed by: * Supported EHT-MCS And NSS Set field: 4, 3, 6 or 9 octets. * EHT PPE Thresholds field: variable length. */ u8 optional[]; } __packed; #define IEEE80211_EHT_OPER_INFO_PRESENT 0x01 #define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x02 #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 #define IEEE80211_EHT_OPER_MCS15_DISABLE 0x40 /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as * described in P802.11be_D2.0 section 9.4.2.311 * * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* * @basic_mcs_nss: indicates the EHT-MCSs for each number of spatial streams in * EHT PPDUs that are supported by all EHT STAs in the BSS in transmit and * receive. * @optional: optional parts */ struct ieee80211_eht_operation { u8 params; struct ieee80211_eht_mcs_nss_supp_20mhz_only basic_mcs_nss; u8 optional[]; } __packed; /** * struct ieee80211_eht_operation_info - eht operation information * * @control: EHT operation information control. * @ccfs0: defines a channel center frequency for a 20, 40, 80, 160, or 320 MHz * EHT BSS. * @ccfs1: defines a channel center frequency for a 160 or 320 MHz EHT BSS. * @optional: optional parts */ struct ieee80211_eht_operation_info { u8 control; u8 ccfs0; u8 ccfs1; u8 optional[]; } __packed; /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 0x00000002 #define IEEE80211_VHT_CAP_MAX_MPDU_MASK 0x00000003 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT 2 #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 #define IEEE80211_VHT_CAP_TXSTBC 0x00000080 #define IEEE80211_VHT_CAP_RXSTBC_1 0x00000100 #define IEEE80211_VHT_CAP_RXSTBC_2 0x00000200 #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_RXSTBC_SHIFT 8 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT 13 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK \ (7 << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT) #define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT 16 #define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK \ (7 << IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT) #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 #define IEEE80211_VHT_CAP_HTC_VHT 0x00400000 #define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT 23 #define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK \ (7 << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT) #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB 0x08000000 #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB 0x0c000000 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN 0x10000000 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN 0x20000000 #define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT 30 #define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK 0xc0000000 /** * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS * @cap: VHT capabilities of the peer * @bw: bandwidth to use * @mcs: MCS index to use * @ext_nss_bw_capable: indicates whether or not the local transmitter * (rate scaling algorithm) can deal with the new logic * (dot11VHTExtendedNSSBWCapable) * @max_vht_nss: current maximum NSS as advertised by the STA in * operating mode notification, can be 0 in which case the * capability data will be used to derive this (from MCS support) * Return: The maximum NSS that can be used for the given bandwidth/MCS * combination * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. * * Note: This function is exported by cfg80211. */ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, enum ieee80211_vht_chanwidth bw, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 #define IEEE80211_HE_MAC_CAP0_TWT_RES 0x04 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP 0x00 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1 0x08 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2 0x10 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3 0x18 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK 0x18 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1 0x00 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2 0x20 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4 0x40 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8 0x60 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16 0x80 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32 0xa0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64 0xc0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED 0xe0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK 0xe0 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED 0x00 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128 0x01 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256 0x02 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512 0x03 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK 0x03 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US 0x00 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US 0x04 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US 0x08 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK 0x0c #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1 0x00 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2 0x10 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3 0x20 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4 0x30 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5 0x40 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6 0x50 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7 0x60 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8 0x70 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK 0x70 /* Link adaptation is split between byte HE_MAC_CAP1 and * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE * in which case the following values apply: * 0 = No feedback. * 1 = reserved. * 2 = Unsolicited feedback. * 3 = both */ #define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION 0x80 #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION 0x01 #define IEEE80211_HE_MAC_CAP2_ALL_ACK 0x02 #define IEEE80211_HE_MAC_CAP2_TRS 0x04 #define IEEE80211_HE_MAC_CAP2_BSR 0x08 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT 0x10 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP 0x20 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING 0x40 #define IEEE80211_HE_MAC_CAP2_ACK_EN 0x80 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL 0x02 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA 0x04 /* The maximum length of an A-MDPU is defined by the combination of the Maximum * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the * same field in the HE capabilities. */ #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0 0x00 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1 0x08 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2 0x10 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3 0x18 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK 0x18 #define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG 0x20 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 #define IEEE80211_HE_MAC_CAP4_PSR_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 #define IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU 0x40 /* Multi TID agg TX is split between byte #4 and #5 * The value is a combination of B39,B40,B41 */ #define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39 0x80 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 0x01 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 0x02 #define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECTIVE_TRANSMISSION 0x04 #define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU 0x08 #define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX 0x10 #define IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS 0x20 #define IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING 0x40 #define IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX 0x80 #define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR 20 #define IEEE80211_HE_HT_MAX_AMPDU_FACTOR 16 #define IEEE80211_HE_6GHZ_MAX_AMPDU_FACTOR 13 /* 802.11ax HE PHY capabilities */ #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G 0x10 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL 0x1e #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G 0x20 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G 0x40 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK 0xfe #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ 0x01 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ 0x02 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ 0x04 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ 0x08 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK 0x0f #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A 0x10 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD 0x20 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US 0x40 /* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */ #define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS 0x80 #define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS 0x01 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US 0x02 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ 0x04 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ 0x08 #define IEEE80211_HE_PHY_CAP2_DOPPLER_TX 0x10 #define IEEE80211_HE_PHY_CAP2_DOPPLER_RX 0x20 /* Note that the meaning of UL MU below is different between an AP and a non-AP * sta, where in the AP case it indicates support for Rx and in the non-AP sta * case it indicates support for Tx. */ #define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO 0x40 #define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO 0x80 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK 0x01 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK 0x02 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM 0x03 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK 0x03 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2 0x04 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK 0x08 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK 0x10 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM 0x18 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK 0x18 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2 0x20 #define IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU 0x40 #define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER 0x80 #define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE 0x01 #define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER 0x02 /* Minimal allowed value of Max STS under 80MHz is 3 */ #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 0x0c #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5 0x10 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6 0x14 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7 0x18 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8 0x1c #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK 0x1c /* Minimal allowed value of Max STS above 80MHz is 3 */ #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 0x60 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5 0x80 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6 0xa0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7 0xc0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8 0xe0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK 0xe0 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1 0x00 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 0x01 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3 0x02 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4 0x03 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5 0x04 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6 0x05 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7 0x06 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8 0x07 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK 0x07 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1 0x00 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 0x08 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3 0x10 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4 0x18 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5 0x20 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6 0x28 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7 0x30 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8 0x38 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK 0x38 #define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK 0x40 #define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK 0x80 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU 0x01 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU 0x02 #define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB 0x04 #define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB 0x08 #define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB 0x10 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE 0x20 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO 0x40 #define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT 0x80 #define IEEE80211_HE_PHY_CAP7_PSR_BASED_SR 0x01 #define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP 0x02 #define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI 0x04 #define IEEE80211_HE_PHY_CAP7_MAX_NC_1 0x08 #define IEEE80211_HE_PHY_CAP7_MAX_NC_2 0x10 #define IEEE80211_HE_PHY_CAP7_MAX_NC_3 0x18 #define IEEE80211_HE_PHY_CAP7_MAX_NC_4 0x20 #define IEEE80211_HE_PHY_CAP7_MAX_NC_5 0x28 #define IEEE80211_HE_PHY_CAP7_MAX_NC_6 0x30 #define IEEE80211_HE_PHY_CAP7_MAX_NC_7 0x38 #define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK 0x38 #define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ 0x40 #define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ 0x80 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI 0x01 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU 0x04 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU 0x08 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI 0x10 #define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF 0x20 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242 0x00 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484 0x40 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996 0x80 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996 0xc0 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK 0xc0 #define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM 0x01 #define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK 0x02 #define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU 0x04 #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US 0x0 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US 0x1 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US 0x2 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED 0x3 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS 6 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK 0xc0 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF 0x01 /* 802.11ax HE TX/RX MCS NSS Support */ #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS (3) #define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS (6) #define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS (11) #define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK 0x07c0 #define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK 0xf800 /* TX/RX HE MCS Support field Highest MCS subfield encoding */ enum ieee80211_he_highest_mcs_supported_subfield_enc { HIGHEST_MCS_SUPPORTED_MCS7 = 0, HIGHEST_MCS_SUPPORTED_MCS8, HIGHEST_MCS_SUPPORTED_MCS9, HIGHEST_MCS_SUPPORTED_MCS10, HIGHEST_MCS_SUPPORTED_MCS11, }; /* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */ static inline u8 ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap) { u8 count = 4; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) count += 4; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) count += 4; return count; } /* 802.11ax HE PPE Thresholds */ #define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS (1) #define IEEE80211_PPE_THRES_NSS_POS (0) #define IEEE80211_PPE_THRES_NSS_MASK (7) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU \ (BIT(5) | BIT(6)) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK 0x78 #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS (3) #define IEEE80211_PPE_THRES_INFO_PPET_SIZE (3) #define IEEE80211_HE_PPE_THRES_INFO_HEADER_SIZE (7) /* * Calculate 802.11ax HE capabilities IE PPE field size * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8* */ static inline u8 ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) { u8 n; if ((phy_cap_info[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0) return 0; n = hweight8(ppe_thres_hdr & IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK); n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >> IEEE80211_PPE_THRES_NSS_POS)); /* * Each pair is 6 bits, and we need to add the 7 "header" bits to the * total size. */ n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7; n = DIV_ROUND_UP(n, 8); return n; } static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) { const struct ieee80211_he_cap_elem *he_cap_ie_elem = (const void *)data; u8 needed = sizeof(*he_cap_ie_elem); if (len < needed) return false; needed += ieee80211_he_mcs_nss_size(he_cap_ie_elem); if (len < needed) return false; if (he_cap_ie_elem->phy_cap_info[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) { if (len < needed + 1) return false; needed += ieee80211_he_ppe_size(data[needed], he_cap_ie_elem->phy_cap_info); } return len >= needed; } /* HE Operation defines */ #define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000007 #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000008 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x00003ff0 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 4 #define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00004000 #define IEEE80211_HE_OPERATION_CO_HOSTED_BSS 0x00008000 #define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000 #define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 #define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 #define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 #define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 #define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 #define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD 4 #define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 8 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field * @primary: primary channel * @control: control flags * @ccfs0: channel center frequency segment 0 * @ccfs1: channel center frequency segment 1 * @minrate: minimum rate (in 1 Mbps units) */ struct ieee80211_he_6ghz_oper { u8 primary; #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH 0x3 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ 0 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ 1 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3 #define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4 #define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x78 u8 control; u8 ccfs0; u8 ccfs1; u8 minrate; } __packed; /** * enum ieee80211_reg_conn_bits - represents Regulatory connectivity field bits. * * This enumeration defines bit flags used to represent regulatory connectivity * field bits. * * @IEEE80211_REG_CONN_LPI_VALID: Indicates whether the LPI bit is valid. * @IEEE80211_REG_CONN_LPI_VALUE: Represents the value of the LPI bit. * @IEEE80211_REG_CONN_SP_VALID: Indicates whether the SP bit is valid. * @IEEE80211_REG_CONN_SP_VALUE: Represents the value of the SP bit. */ enum ieee80211_reg_conn_bits { IEEE80211_REG_CONN_LPI_VALID = BIT(0), IEEE80211_REG_CONN_LPI_VALUE = BIT(1), IEEE80211_REG_CONN_SP_VALID = BIT(2), IEEE80211_REG_CONN_SP_VALUE = BIT(3), }; /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, IEEE80211_TPE_LOCAL_EIRP_PSD, IEEE80211_TPE_REG_CLIENT_EIRP, IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; /* category type of transmit power envelope element */ enum ieee80211_tx_power_category_6ghz { IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, }; /* * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, * setting to 63.5 dBm means no constraint. */ #define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 /* * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, * setting to 127 indicates no PSD limit for the 20 MHz channel. */ #define IEEE80211_TPE_PSD_NO_LIMIT 127 /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope * @info: Transmit Power Information field * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { u8 info; u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 #define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) { const struct ieee80211_tx_pwr_env *env = (const void *)data; u8 count, interpret, category; u8 needed = sizeof(*env); u8 N; /* also called N in the spec */ if (len < needed) return false; count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); switch (category) { case IEEE80211_TPE_CAT_6GHZ_DEFAULT: case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: break; default: return false; } switch (interpret) { case IEEE80211_TPE_LOCAL_EIRP: case IEEE80211_TPE_REG_CLIENT_EIRP: if (count > 3) return false; /* count == 0 encodes 1 value for 20 MHz, etc. */ needed += count + 1; if (len < needed) return false; /* there can be extension fields not accounted for in 'count' */ return true; case IEEE80211_TPE_LOCAL_EIRP_PSD: case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: if (count > 4) return false; N = count ? 1 << (count - 1) : 1; needed += N; if (len < needed) return false; if (len > needed) { u8 K = u8_get_bits(env->variable[N], IEEE80211_TX_PWR_ENV_EXT_COUNT); needed += 1 + K; if (len < needed) return false; } return true; } return false; } /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte * after the ext ID byte. It is assumed that he_oper_ie has at least * sizeof(struct ieee80211_he_operation) bytes, the caller must have * validated this. * @return the actual size of the IE data (not including header), or 0 on error */ static inline u8 ieee80211_he_oper_size(const u8 *he_oper_ie) { const struct ieee80211_he_operation *he_oper = (const void *)he_oper_ie; u8 oper_len = sizeof(struct ieee80211_he_operation); u32 he_oper_params; /* Make sure the input is not NULL */ if (!he_oper_ie) return 0; /* Calc required length */ he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) oper_len += 3; if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) oper_len++; if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO) oper_len += sizeof(struct ieee80211_he_6ghz_oper); /* Add the first byte (extension ID) to the total length */ oper_len++; return oper_len; } /** * ieee80211_he_6ghz_oper - obtain 6 GHz operation field * @he_oper: HE operation element (must be pre-validated for size) * but may be %NULL * * Return: a pointer to the 6 GHz operation field, or %NULL */ static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; ret = (const void *)&he_oper->optional; he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) return NULL; if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) ret += 3; if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) ret++; return (const void *)ret; } /* HE Spatial Reuse defines */ #define IEEE80211_HE_SPR_PSR_DISALLOWED BIT(0) #define IEEE80211_HE_SPR_NON_SRG_OBSS_PD_SR_DISALLOWED BIT(1) #define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT BIT(2) #define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT BIT(3) #define IEEE80211_HE_SPR_HESIGA_SR_VAL15_ALLOWED BIT(4) /* * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the byte * after the ext ID byte. It is assumed that he_spr_ie has at least * sizeof(struct ieee80211_he_spr) bytes, the caller must have validated * this * @return the actual size of the IE data (not including header), or 0 on error */ static inline u8 ieee80211_he_spr_size(const u8 *he_spr_ie) { const struct ieee80211_he_spr *he_spr = (const void *)he_spr_ie; u8 spr_len = sizeof(struct ieee80211_he_spr); u8 he_spr_params; /* Make sure the input is not NULL */ if (!he_spr_ie) return 0; /* Calc required length */ he_spr_params = he_spr->he_sr_control; if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) spr_len++; if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) spr_len += 18; /* Add the first byte (extension ID) to the total length */ spr_len++; return spr_len; } /* S1G Capabilities Information field */ #define IEEE80211_S1G_CAPABILITY_LEN 15 #define S1G_CAP0_S1G_LONG BIT(0) #define S1G_CAP0_SGI_1MHZ BIT(1) #define S1G_CAP0_SGI_2MHZ BIT(2) #define S1G_CAP0_SGI_4MHZ BIT(3) #define S1G_CAP0_SGI_8MHZ BIT(4) #define S1G_CAP0_SGI_16MHZ BIT(5) #define S1G_CAP0_SUPP_CH_WIDTH GENMASK(7, 6) #define S1G_SUPP_CH_WIDTH_2 0 #define S1G_SUPP_CH_WIDTH_4 1 #define S1G_SUPP_CH_WIDTH_8 2 #define S1G_SUPP_CH_WIDTH_16 3 #define S1G_SUPP_CH_WIDTH_MAX(cap) ((1 << FIELD_GET(S1G_CAP0_SUPP_CH_WIDTH, \ cap[0])) << 1) #define S1G_CAP1_RX_LDPC BIT(0) #define S1G_CAP1_TX_STBC BIT(1) #define S1G_CAP1_RX_STBC BIT(2) #define S1G_CAP1_SU_BFER BIT(3) #define S1G_CAP1_SU_BFEE BIT(4) #define S1G_CAP1_BFEE_STS GENMASK(7, 5) #define S1G_CAP2_SOUNDING_DIMENSIONS GENMASK(2, 0) #define S1G_CAP2_MU_BFER BIT(3) #define S1G_CAP2_MU_BFEE BIT(4) #define S1G_CAP2_PLUS_HTC_VHT BIT(5) #define S1G_CAP2_TRAVELING_PILOT GENMASK(7, 6) #define S1G_CAP3_RD_RESPONDER BIT(0) #define S1G_CAP3_HT_DELAYED_BA BIT(1) #define S1G_CAP3_MAX_MPDU_LEN BIT(2) #define S1G_CAP3_MAX_AMPDU_LEN_EXP GENMASK(4, 3) #define S1G_CAP3_MIN_MPDU_START GENMASK(7, 5) #define S1G_CAP4_UPLINK_SYNC BIT(0) #define S1G_CAP4_DYNAMIC_AID BIT(1) #define S1G_CAP4_BAT BIT(2) #define S1G_CAP4_TIME_ADE BIT(3) #define S1G_CAP4_NON_TIM BIT(4) #define S1G_CAP4_GROUP_AID BIT(5) #define S1G_CAP4_STA_TYPE GENMASK(7, 6) #define S1G_CAP5_CENT_AUTH_CONTROL BIT(0) #define S1G_CAP5_DIST_AUTH_CONTROL BIT(1) #define S1G_CAP5_AMSDU BIT(2) #define S1G_CAP5_AMPDU BIT(3) #define S1G_CAP5_ASYMMETRIC_BA BIT(4) #define S1G_CAP5_FLOW_CONTROL BIT(5) #define S1G_CAP5_SECTORIZED_BEAM GENMASK(7, 6) #define S1G_CAP6_OBSS_MITIGATION BIT(0) #define S1G_CAP6_FRAGMENT_BA BIT(1) #define S1G_CAP6_NDP_PS_POLL BIT(2) #define S1G_CAP6_RAW_OPERATION BIT(3) #define S1G_CAP6_PAGE_SLICING BIT(4) #define S1G_CAP6_TXOP_SHARING_IMP_ACK BIT(5) #define S1G_CAP6_VHT_LINK_ADAPT GENMASK(7, 6) #define S1G_CAP7_TACK_AS_PS_POLL BIT(0) #define S1G_CAP7_DUP_1MHZ BIT(1) #define S1G_CAP7_MCS_NEGOTIATION BIT(2) #define S1G_CAP7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3) #define S1G_CAP7_NDP_BFING_REPORT_POLL BIT(4) #define S1G_CAP7_UNSOLICITED_DYN_AID BIT(5) #define S1G_CAP7_SECTOR_TRAINING_OPERATION BIT(6) #define S1G_CAP7_TEMP_PS_MODE_SWITCH BIT(7) #define S1G_CAP8_TWT_GROUPING BIT(0) #define S1G_CAP8_BDT BIT(1) #define S1G_CAP8_COLOR GENMASK(4, 2) #define S1G_CAP8_TWT_REQUEST BIT(5) #define S1G_CAP8_TWT_RESPOND BIT(6) #define S1G_CAP8_PV1_FRAME BIT(7) #define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) #define S1G_OPER_CH_WIDTH_PRIMARY BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) #define S1G_OPER_CH_PRIMARY_LOCATION BIT(5) #define S1G_2M_PRIMARY_LOCATION_LOWER 0 #define S1G_2M_PRIMARY_LOCATION_UPPER 1 /* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */ #define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS 0x01 #define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 #define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 #define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK 0xc0 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895 0 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991 1 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 #define IEEE80211_EHT_MAC_CAP1_EHT_TRS 0x02 #define IEEE80211_EHT_MAC_CAP1_TXOP_RET 0x04 #define IEEE80211_EHT_MAC_CAP1_TWO_BQRS 0x08 #define IEEE80211_EHT_MAC_CAP1_EHT_LINK_ADAPT_MASK 0x30 #define IEEE80211_EHT_MAC_CAP1_UNSOL_EPCS_PRIO_ACCESS 0x40 /* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 #define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 #define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 #define IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO 0x10 #define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER 0x20 #define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE 0x40 /* EHT beamformee number of spatial streams <= 80MHz is split */ #define IEEE80211_EHT_PHY_CAP0_BEAMFORMEE_SS_80MHZ_MASK 0x80 #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK 0x03 #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK 0x1c #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK 0xe0 #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_80MHZ_MASK 0x07 #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK 0x38 /* EHT number of sounding dimensions for 320MHz is split */ #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK 0xc0 #define IEEE80211_EHT_PHY_CAP3_SOUNDING_DIM_320MHZ_MASK 0x01 #define IEEE80211_EHT_PHY_CAP3_NG_16_SU_FEEDBACK 0x02 #define IEEE80211_EHT_PHY_CAP3_NG_16_MU_FEEDBACK 0x04 #define IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK 0x08 #define IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK 0x10 #define IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK 0x20 #define IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK 0x40 #define IEEE80211_EHT_PHY_CAP3_TRIG_CQI_FDBK 0x80 #define IEEE80211_EHT_PHY_CAP4_PART_BW_DL_MU_MIMO 0x01 #define IEEE80211_EHT_PHY_CAP4_PSR_SR_SUPP 0x02 #define IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP 0x04 #define IEEE80211_EHT_PHY_CAP4_EHT_MU_PPDU_4_EHT_LTF_08_GI 0x08 #define IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK 0xf0 #define IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK 0x01 #define IEEE80211_EHT_PHY_CAP5_TX_LESS_242_TONE_RU_SUPP 0x02 #define IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP 0x04 #define IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT 0x08 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK 0x30 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_0US 0 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_8US 1 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US 2 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US 3 /* Maximum number of supported EHT LTF is split */ #define IEEE80211_EHT_PHY_CAP5_MAX_NUM_SUPP_EHT_LTF_MASK 0xc0 #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 #define IEEE80211_EHT_PHY_CAP7_20MHZ_STA_RX_NDP_WIDER_BW 0x01 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ 0x02 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ 0x04 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ 0x08 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ 0x10 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ 0x20 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP7_TB_SOUNDING_FDBK_RATE_LIMIT 0x80 #define IEEE80211_EHT_PHY_CAP8_RX_1024QAM_WIDER_BW_DL_OFDMA 0x01 #define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 /* * EHT operation channel width as defined in P802.11be_D2.0 section 9.4.2.311 */ #define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 #define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 #define IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ 1 #define IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ 2 #define IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ 3 #define IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ 4 /* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */ static inline u8 ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, const struct ieee80211_eht_cap_elem_fixed *eht_cap, bool from_ap) { u8 count = 0; /* on 2.4 GHz, if it supports 40 MHz, the result is 3 */ if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) return 3; /* on 2.4 GHz, these three bits are reserved, so should be 0 */ if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) count += 3; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) count += 3; if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) count += 3; if (count) return count; return from_ap ? 3 : 4; } /* 802.11be EHT PPE Thresholds */ #define IEEE80211_EHT_PPE_THRES_NSS_POS 0 #define IEEE80211_EHT_PPE_THRES_NSS_MASK 0xf #define IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK 0x1f0 #define IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE 3 #define IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE 9 /* * Calculate 802.11be EHT capabilities IE EHT field size */ static inline u8 ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info) { u32 n; if (!(phy_cap_info[5] & IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT)) return 0; n = hweight16(ppe_thres_hdr & IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK); n *= 1 + u16_get_bits(ppe_thres_hdr, IEEE80211_EHT_PPE_THRES_NSS_MASK); /* * Each pair is 6 bits, and we need to add the 9 "header" bits to the * total size. */ n = n * IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2 + IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE; return DIV_ROUND_UP(n, 8); } static inline bool ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len, bool from_ap) { const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data; u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed); if (len < needed || !he_capa) return false; needed += ieee80211_eht_mcs_nss_size((const void *)he_capa, (const void *)data, from_ap); if (len < needed) return false; if (elem->phy_cap_info[5] & IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) { u16 ppe_thres_hdr; if (len < needed + sizeof(ppe_thres_hdr)) return false; ppe_thres_hdr = get_unaligned_le16(data + needed); needed += ieee80211_eht_ppe_size(ppe_thres_hdr, elem->phy_cap_info); } return len >= needed; } static inline bool ieee80211_eht_oper_size_ok(const u8 *data, u8 len) { const struct ieee80211_eht_operation *elem = (const void *)data; u8 needed = sizeof(*elem); if (len < needed) return false; if (elem->params & IEEE80211_EHT_OPER_INFO_PRESENT) { needed += 3; if (elem->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) needed += 2; } return len >= needed; } /* must validate ieee80211_eht_oper_size_ok() first */ static inline u16 ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) { const struct ieee80211_eht_operation_info *info = (const void *)eht_oper->optional; if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) return 0; if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) return 0; return get_unaligned_le16(info->optional); } #define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) struct ieee80211_bandwidth_indication { u8 params; struct ieee80211_eht_operation_info info; } __packed; static inline bool ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) { const struct ieee80211_bandwidth_indication *bwi = (const void *)data; if (len < sizeof(*bwi)) return false; if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && len < sizeof(*bwi) + 2) return false; return true; } #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) #define IEEE80211_MAX_USF FIELD_MAX(LISTEN_INT_USF) #define IEEE80211_MAX_UI FIELD_MAX(LISTEN_INT_UI) /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 #define WLAN_AUTH_FILS_SK 4 #define WLAN_AUTH_FILS_SK_PFS 5 #define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 #define WLAN_CAPABILITY_ESS (1<<0) #define WLAN_CAPABILITY_IBSS (1<<1) /* * A mesh STA sets the ESS and IBSS capability bits to zero. * however, this holds true for p2p probe responses (in the p2p_find * phase) as well. */ #define WLAN_CAPABILITY_IS_STA_BSS(cap) \ (!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS))) #define WLAN_CAPABILITY_CF_POLLABLE (1<<2) #define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3) #define WLAN_CAPABILITY_PRIVACY (1<<4) #define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5) #define WLAN_CAPABILITY_PBCC (1<<6) #define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7) /* 802.11h */ #define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) #define WLAN_CAPABILITY_QOS (1<<9) #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_APSD (1<<11) #define WLAN_CAPABILITY_RADIO_MEASURE (1<<12) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) #define WLAN_CAPABILITY_DEL_BACK (1<<14) #define WLAN_CAPABILITY_IMM_BACK (1<<15) /* DMG (60gHz) 802.11ad */ /* type - bits 0..1 */ #define WLAN_CAPABILITY_DMG_TYPE_MASK (3<<0) #define WLAN_CAPABILITY_DMG_TYPE_IBSS (1<<0) /* Tx by: STA */ #define WLAN_CAPABILITY_DMG_TYPE_PBSS (2<<0) /* Tx by: PCP */ #define WLAN_CAPABILITY_DMG_TYPE_AP (3<<0) /* Tx by: AP */ #define WLAN_CAPABILITY_DMG_CBAP_ONLY (1<<2) #define WLAN_CAPABILITY_DMG_CBAP_SOURCE (1<<3) #define WLAN_CAPABILITY_DMG_PRIVACY (1<<4) #define WLAN_CAPABILITY_DMG_ECPAC (1<<5) #define WLAN_CAPABILITY_DMG_SPECTRUM_MGMT (1<<8) #define WLAN_CAPABILITY_DMG_RADIO_MEASURE (1<<12) /* measurement */ #define IEEE80211_SPCT_MSR_RPRT_MODE_LATE (1<<0) #define IEEE80211_SPCT_MSR_RPRT_MODE_INCAPABLE (1<<1) #define IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED (1<<2) #define IEEE80211_SPCT_MSR_RPRT_TYPE_BASIC 0 #define IEEE80211_SPCT_MSR_RPRT_TYPE_CCA 1 #define IEEE80211_SPCT_MSR_RPRT_TYPE_RPI 2 #define IEEE80211_SPCT_MSR_RPRT_TYPE_LCI 8 #define IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC 11 /* 802.11g ERP information element */ #define WLAN_ERP_NON_ERP_PRESENT (1<<0) #define WLAN_ERP_USE_PROTECTION (1<<1) #define WLAN_ERP_BARKER_PREAMBLE (1<<2) /* WLAN_ERP_BARKER_PREAMBLE values */ enum { WLAN_ERP_PREAMBLE_SHORT = 0, WLAN_ERP_PREAMBLE_LONG = 1, }; /* Band ID, 802.11ad #8.4.1.45 */ enum { IEEE80211_BANDID_TV_WS = 0, /* TV white spaces */ IEEE80211_BANDID_SUB1 = 1, /* Sub-1 GHz (excluding TV white spaces) */ IEEE80211_BANDID_2G = 2, /* 2.4 GHz */ IEEE80211_BANDID_3G = 3, /* 3.6 GHz */ IEEE80211_BANDID_5G = 4, /* 4.9 and 5 GHz */ IEEE80211_BANDID_60G = 5, /* 60 GHz */ }; /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, WLAN_STATUS_UNSPECIFIED_FAILURE = 1, WLAN_STATUS_CAPS_UNSUPPORTED = 10, WLAN_STATUS_REASSOC_NO_ASSOC = 11, WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12, WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13, WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14, WLAN_STATUS_CHALLENGE_FAIL = 15, WLAN_STATUS_AUTH_TIMEOUT = 16, WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17, WLAN_STATUS_ASSOC_DENIED_RATES = 18, /* 802.11b */ WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19, WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20, WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21, /* 802.11h */ WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22, WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23, WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24, /* 802.11g */ WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25, WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26, /* 802.11w */ WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY = 30, WLAN_STATUS_ROBUST_MGMT_FRAME_POLICY_VIOLATION = 31, /* 802.11i */ WLAN_STATUS_INVALID_IE = 40, WLAN_STATUS_INVALID_GROUP_CIPHER = 41, WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42, WLAN_STATUS_INVALID_AKMP = 43, WLAN_STATUS_UNSUPP_RSN_VERSION = 44, WLAN_STATUS_INVALID_RSN_IE_CAP = 45, WLAN_STATUS_CIPHER_SUITE_REJECTED = 46, /* 802.11e */ WLAN_STATUS_UNSPECIFIED_QOS = 32, WLAN_STATUS_ASSOC_DENIED_NOBANDWIDTH = 33, WLAN_STATUS_ASSOC_DENIED_LOWACK = 34, WLAN_STATUS_ASSOC_DENIED_UNSUPP_QOS = 35, WLAN_STATUS_REQUEST_DECLINED = 37, WLAN_STATUS_INVALID_QOS_PARAM = 38, WLAN_STATUS_CHANGE_TSPEC = 39, WLAN_STATUS_WAIT_TS_DELAY = 47, WLAN_STATUS_NO_DIRECT_LINK = 48, WLAN_STATUS_STA_NOT_PRESENT = 49, WLAN_STATUS_STA_NOT_QSTA = 50, /* 802.11s */ WLAN_STATUS_ANTI_CLOG_REQUIRED = 76, WLAN_STATUS_FCG_NOT_SUPP = 78, WLAN_STATUS_STA_NO_TBTT = 78, /* 802.11ad */ WLAN_STATUS_REJECTED_WITH_SUGGESTED_CHANGES = 39, WLAN_STATUS_REJECTED_FOR_DELAY_PERIOD = 47, WLAN_STATUS_REJECT_WITH_SCHEDULE = 83, WLAN_STATUS_PENDING_ADMITTING_FST_SESSION = 86, WLAN_STATUS_PERFORMING_FST_NOW = 87, WLAN_STATUS_PENDING_GAP_IN_BA_WINDOW = 88, WLAN_STATUS_REJECT_U_PID_SETTING = 89, WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, /* 802.11ai */ WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; /* Reason codes */ enum ieee80211_reasoncode { WLAN_REASON_UNSPECIFIED = 1, WLAN_REASON_PREV_AUTH_NOT_VALID = 2, WLAN_REASON_DEAUTH_LEAVING = 3, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4, WLAN_REASON_DISASSOC_AP_BUSY = 5, WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6, WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7, WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8, WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9, /* 802.11h */ WLAN_REASON_DISASSOC_BAD_POWER = 10, WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11, /* 802.11i */ WLAN_REASON_INVALID_IE = 13, WLAN_REASON_MIC_FAILURE = 14, WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15, WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16, WLAN_REASON_IE_DIFFERENT = 17, WLAN_REASON_INVALID_GROUP_CIPHER = 18, WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19, WLAN_REASON_INVALID_AKMP = 20, WLAN_REASON_UNSUPP_RSN_VERSION = 21, WLAN_REASON_INVALID_RSN_IE_CAP = 22, WLAN_REASON_IEEE8021X_FAILED = 23, WLAN_REASON_CIPHER_SUITE_REJECTED = 24, /* TDLS (802.11z) */ WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE = 25, WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED = 26, /* 802.11e */ WLAN_REASON_DISASSOC_UNSPECIFIED_QOS = 32, WLAN_REASON_DISASSOC_QAP_NO_BANDWIDTH = 33, WLAN_REASON_DISASSOC_LOW_ACK = 34, WLAN_REASON_DISASSOC_QAP_EXCEED_TXOP = 35, WLAN_REASON_QSTA_LEAVE_QBSS = 36, WLAN_REASON_QSTA_NOT_USE = 37, WLAN_REASON_QSTA_REQUIRE_SETUP = 38, WLAN_REASON_QSTA_TIMEOUT = 39, WLAN_REASON_QSTA_CIPHER_NOT_SUPP = 45, /* 802.11s */ WLAN_REASON_MESH_PEER_CANCELED = 52, WLAN_REASON_MESH_MAX_PEERS = 53, WLAN_REASON_MESH_CONFIG = 54, WLAN_REASON_MESH_CLOSE = 55, WLAN_REASON_MESH_MAX_RETRIES = 56, WLAN_REASON_MESH_CONFIRM_TIMEOUT = 57, WLAN_REASON_MESH_INVALID_GTK = 58, WLAN_REASON_MESH_INCONSISTENT_PARAM = 59, WLAN_REASON_MESH_INVALID_SECURITY = 60, WLAN_REASON_MESH_PATH_ERROR = 61, WLAN_REASON_MESH_PATH_NOFORWARD = 62, WLAN_REASON_MESH_PATH_DEST_UNREACHABLE = 63, WLAN_REASON_MAC_EXISTS_IN_MBSS = 64, WLAN_REASON_MESH_CHAN_REGULATORY = 65, WLAN_REASON_MESH_CHAN = 66, }; /* Information Element IDs */ enum ieee80211_eid { WLAN_EID_SSID = 0, WLAN_EID_SUPP_RATES = 1, WLAN_EID_FH_PARAMS = 2, /* reserved now */ WLAN_EID_DS_PARAMS = 3, WLAN_EID_CF_PARAMS = 4, WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, WLAN_EID_COUNTRY = 7, /* 8, 9 reserved */ WLAN_EID_REQUEST = 10, WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, WLAN_EID_TSPEC = 13, WLAN_EID_TCLAS = 14, WLAN_EID_SCHEDULE = 15, WLAN_EID_CHALLENGE = 16, /* 17-31 reserved for challenge text extension */ WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, WLAN_EID_TPC_REQUEST = 34, WLAN_EID_TPC_REPORT = 35, WLAN_EID_SUPPORTED_CHANNELS = 36, WLAN_EID_CHANNEL_SWITCH = 37, WLAN_EID_MEASURE_REQUEST = 38, WLAN_EID_MEASURE_REPORT = 39, WLAN_EID_QUIET = 40, WLAN_EID_IBSS_DFS = 41, WLAN_EID_ERP_INFO = 42, WLAN_EID_TS_DELAY = 43, WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_HT_CAPABILITY = 45, WLAN_EID_QOS_CAPA = 46, /* 47 reserved for Broadcom */ WLAN_EID_RSN = 48, WLAN_EID_802_15_COEX = 49, WLAN_EID_EXT_SUPP_RATES = 50, WLAN_EID_AP_CHAN_REPORT = 51, WLAN_EID_NEIGHBOR_REPORT = 52, WLAN_EID_RCPI = 53, WLAN_EID_MOBILITY_DOMAIN = 54, WLAN_EID_FAST_BSS_TRANSITION = 55, WLAN_EID_TIMEOUT_INTERVAL = 56, WLAN_EID_RIC_DATA = 57, WLAN_EID_DSE_REGISTERED_LOCATION = 58, WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59, WLAN_EID_EXT_CHANSWITCH_ANN = 60, WLAN_EID_HT_OPERATION = 61, WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, WLAN_EID_BSS_AVG_ACCESS_DELAY = 63, WLAN_EID_ANTENNA_INFO = 64, WLAN_EID_RSNI = 65, WLAN_EID_MEASUREMENT_PILOT_TX_INFO = 66, WLAN_EID_BSS_AVAILABLE_CAPACITY = 67, WLAN_EID_BSS_AC_ACCESS_DELAY = 68, WLAN_EID_TIME_ADVERTISEMENT = 69, WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, WLAN_EID_MULTIPLE_BSSID = 71, WLAN_EID_BSS_COEX_2040 = 72, WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73, WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74, WLAN_EID_RIC_DESCRIPTOR = 75, WLAN_EID_MMIE = 76, WLAN_EID_ASSOC_COMEBACK_TIME = 77, WLAN_EID_EVENT_REQUEST = 78, WLAN_EID_EVENT_REPORT = 79, WLAN_EID_DIAGNOSTIC_REQUEST = 80, WLAN_EID_DIAGNOSTIC_REPORT = 81, WLAN_EID_LOCATION_PARAMS = 82, WLAN_EID_NON_TX_BSSID_CAP = 83, WLAN_EID_SSID_LIST = 84, WLAN_EID_MULTI_BSSID_IDX = 85, WLAN_EID_FMS_DESCRIPTOR = 86, WLAN_EID_FMS_REQUEST = 87, WLAN_EID_FMS_RESPONSE = 88, WLAN_EID_QOS_TRAFFIC_CAPA = 89, WLAN_EID_BSS_MAX_IDLE_PERIOD = 90, WLAN_EID_TSF_REQUEST = 91, WLAN_EID_TSF_RESPOSNE = 92, WLAN_EID_WNM_SLEEP_MODE = 93, WLAN_EID_TIM_BCAST_REQ = 94, WLAN_EID_TIM_BCAST_RESP = 95, WLAN_EID_COLL_IF_REPORT = 96, WLAN_EID_CHANNEL_USAGE = 97, WLAN_EID_TIME_ZONE = 98, WLAN_EID_DMS_REQUEST = 99, WLAN_EID_DMS_RESPONSE = 100, WLAN_EID_LINK_ID = 101, WLAN_EID_WAKEUP_SCHEDUL = 102, /* 103 reserved */ WLAN_EID_CHAN_SWITCH_TIMING = 104, WLAN_EID_PTI_CONTROL = 105, WLAN_EID_PU_BUFFER_STATUS = 106, WLAN_EID_INTERWORKING = 107, WLAN_EID_ADVERTISEMENT_PROTOCOL = 108, WLAN_EID_EXPEDITED_BW_REQ = 109, WLAN_EID_QOS_MAP_SET = 110, WLAN_EID_ROAMING_CONSORTIUM = 111, WLAN_EID_EMERGENCY_ALERT = 112, WLAN_EID_MESH_CONFIG = 113, WLAN_EID_MESH_ID = 114, WLAN_EID_LINK_METRIC_REPORT = 115, WLAN_EID_CONGESTION_NOTIFICATION = 116, WLAN_EID_PEER_MGMT = 117, WLAN_EID_CHAN_SWITCH_PARAM = 118, WLAN_EID_MESH_AWAKE_WINDOW = 119, WLAN_EID_BEACON_TIMING = 120, WLAN_EID_MCCAOP_SETUP_REQ = 121, WLAN_EID_MCCAOP_SETUP_RESP = 122, WLAN_EID_MCCAOP_ADVERT = 123, WLAN_EID_MCCAOP_TEARDOWN = 124, WLAN_EID_GANN = 125, WLAN_EID_RANN = 126, WLAN_EID_EXT_CAPABILITY = 127, /* 128, 129 reserved for Agere */ WLAN_EID_PREQ = 130, WLAN_EID_PREP = 131, WLAN_EID_PERR = 132, /* 133-136 reserved for Cisco */ WLAN_EID_PXU = 137, WLAN_EID_PXUC = 138, WLAN_EID_AUTH_MESH_PEER_EXCH = 139, WLAN_EID_MIC = 140, WLAN_EID_DESTINATION_URI = 141, WLAN_EID_UAPSD_COEX = 142, WLAN_EID_WAKEUP_SCHEDULE = 143, WLAN_EID_EXT_SCHEDULE = 144, WLAN_EID_STA_AVAILABILITY = 145, WLAN_EID_DMG_TSPEC = 146, WLAN_EID_DMG_AT = 147, WLAN_EID_DMG_CAP = 148, /* 149 reserved for Cisco */ WLAN_EID_CISCO_VENDOR_SPECIFIC = 150, WLAN_EID_DMG_OPERATION = 151, WLAN_EID_DMG_BSS_PARAM_CHANGE = 152, WLAN_EID_DMG_BEAM_REFINEMENT = 153, WLAN_EID_CHANNEL_MEASURE_FEEDBACK = 154, /* 155-156 reserved for Cisco */ WLAN_EID_AWAKE_WINDOW = 157, WLAN_EID_MULTI_BAND = 158, WLAN_EID_ADDBA_EXT = 159, WLAN_EID_NEXT_PCP_LIST = 160, WLAN_EID_PCP_HANDOVER = 161, WLAN_EID_DMG_LINK_MARGIN = 162, WLAN_EID_SWITCHING_STREAM = 163, WLAN_EID_SESSION_TRANSITION = 164, WLAN_EID_DYN_TONE_PAIRING_REPORT = 165, WLAN_EID_CLUSTER_REPORT = 166, WLAN_EID_RELAY_CAP = 167, WLAN_EID_RELAY_XFER_PARAM_SET = 168, WLAN_EID_BEAM_LINK_MAINT = 169, WLAN_EID_MULTIPLE_MAC_ADDR = 170, WLAN_EID_U_PID = 171, WLAN_EID_DMG_LINK_ADAPT_ACK = 172, /* 173 reserved for Symbol */ WLAN_EID_MCCAOP_ADV_OVERVIEW = 174, WLAN_EID_QUIET_PERIOD_REQ = 175, /* 176 reserved for Symbol */ WLAN_EID_QUIET_PERIOD_RESP = 177, /* 178-179 reserved for Symbol */ /* 180 reserved for ISO/IEC 20011 */ WLAN_EID_EPAC_POLICY = 182, WLAN_EID_CLISTER_TIME_OFF = 183, WLAN_EID_INTER_AC_PRIO = 184, WLAN_EID_SCS_DESCRIPTOR = 185, WLAN_EID_QLOAD_REPORT = 186, WLAN_EID_HCCA_TXOP_UPDATE_COUNT = 187, WLAN_EID_HL_STREAM_ID = 188, WLAN_EID_GCR_GROUP_ADDR = 189, WLAN_EID_ANTENNA_SECTOR_ID_PATTERN = 190, WLAN_EID_VHT_CAPABILITY = 191, WLAN_EID_VHT_OPERATION = 192, WLAN_EID_EXTENDED_BSS_LOAD = 193, WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194, WLAN_EID_TX_POWER_ENVELOPE = 195, WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196, WLAN_EID_AID = 197, WLAN_EID_QUIET_CHANNEL = 198, WLAN_EID_OPMODE_NOTIF = 199, WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201, WLAN_EID_AID_REQUEST = 210, WLAN_EID_AID_RESPONSE = 211, WLAN_EID_S1G_BCN_COMPAT = 213, WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214, WLAN_EID_S1G_TWT = 216, WLAN_EID_S1G_CAPABILITIES = 217, WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, WLAN_EID_S1G_OPERATION = 232, WLAN_EID_CAG_NUMBER = 237, WLAN_EID_AP_CSN = 239, WLAN_EID_FILS_INDICATION = 240, WLAN_EID_DILS = 241, WLAN_EID_FRAGMENT = 242, WLAN_EID_RSNX = 244, WLAN_EID_EXTENSION = 255 }; /* Element ID Extensions for Element ID 255 */ enum ieee80211_eid_ext { WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, WLAN_EID_EXT_FILS_REQ_PARAMS = 2, WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, WLAN_EID_EXT_FILS_SESSION = 4, WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, WLAN_EID_EXT_KEY_DELIVERY = 7, WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, WLAN_EID_EXT_FILS_NONCE = 13, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14, WLAN_EID_EXT_DH_PARAMETER = 32, WLAN_EID_EXT_HE_CAPABILITY = 35, WLAN_EID_EXT_HE_OPERATION = 36, WLAN_EID_EXT_UORA = 37, WLAN_EID_EXT_HE_MU_EDCA = 38, WLAN_EID_EXT_HE_SPR = 39, WLAN_EID_EXT_NDP_FEEDBACK_REPORT_PARAMSET = 41, WLAN_EID_EXT_BSS_COLOR_CHG_ANN = 42, WLAN_EID_EXT_QUIET_TIME_PERIOD_SETUP = 43, WLAN_EID_EXT_ESS_REPORT = 45, WLAN_EID_EXT_OPS = 46, WLAN_EID_EXT_HE_BSS_LOAD = 47, WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55, WLAN_EID_EXT_NON_INHERITANCE = 56, WLAN_EID_EXT_KNOWN_BSSID = 57, WLAN_EID_EXT_SHORT_SSID_LIST = 58, WLAN_EID_EXT_HE_6GHZ_CAPA = 59, WLAN_EID_EXT_UL_MU_POWER_CAPA = 60, WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION = 136, WLAN_EID_EXT_NON_AP_STA_REG_CON = 137, }; /* Action category code */ enum ieee80211_category { WLAN_CATEGORY_SPECTRUM_MGMT = 0, WLAN_CATEGORY_QOS = 1, WLAN_CATEGORY_DLS = 2, WLAN_CATEGORY_BACK = 3, WLAN_CATEGORY_PUBLIC = 4, WLAN_CATEGORY_RADIO_MEASUREMENT = 5, WLAN_CATEGORY_FAST_BBS_TRANSITION = 6, WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, WLAN_CATEGORY_WNM = 10, WLAN_CATEGORY_WNM_UNPROTECTED = 11, WLAN_CATEGORY_TDLS = 12, WLAN_CATEGORY_MESH_ACTION = 13, WLAN_CATEGORY_MULTIHOP_ACTION = 14, WLAN_CATEGORY_SELF_PROTECTED = 15, WLAN_CATEGORY_DMG = 16, WLAN_CATEGORY_WMM = 17, WLAN_CATEGORY_FST = 18, WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; /* SPECTRUM_MGMT action code */ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_MSR_REQ = 0, WLAN_ACTION_SPCT_MSR_RPRT = 1, WLAN_ACTION_SPCT_TPC_REQ = 2, WLAN_ACTION_SPCT_TPC_RPRT = 3, WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; /* HT action codes */ enum ieee80211_ht_actioncode { WLAN_HT_ACTION_NOTIFY_CHANWIDTH = 0, WLAN_HT_ACTION_SMPS = 1, WLAN_HT_ACTION_PSMP = 2, WLAN_HT_ACTION_PCO_PHASE = 3, WLAN_HT_ACTION_CSI = 4, WLAN_HT_ACTION_NONCOMPRESSED_BF = 5, WLAN_HT_ACTION_COMPRESSED_BF = 6, WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, }; /* VHT action codes */ enum ieee80211_vht_actioncode { WLAN_VHT_ACTION_COMPRESSED_BF = 0, WLAN_VHT_ACTION_GROUPID_MGMT = 1, WLAN_VHT_ACTION_OPMODE_NOTIF = 2, }; /* Self Protected Action codes */ enum ieee80211_self_protected_actioncode { WLAN_SP_RESERVED = 0, WLAN_SP_MESH_PEERING_OPEN = 1, WLAN_SP_MESH_PEERING_CONFIRM = 2, WLAN_SP_MESH_PEERING_CLOSE = 3, WLAN_SP_MGK_INFORM = 4, WLAN_SP_MGK_ACK = 5, }; /* Mesh action codes */ enum ieee80211_mesh_actioncode { WLAN_MESH_ACTION_LINK_METRIC_REPORT, WLAN_MESH_ACTION_HWMP_PATH_SELECTION, WLAN_MESH_ACTION_GATE_ANNOUNCEMENT, WLAN_MESH_ACTION_CONGESTION_CONTROL_NOTIFICATION, WLAN_MESH_ACTION_MCCA_SETUP_REQUEST, WLAN_MESH_ACTION_MCCA_SETUP_REPLY, WLAN_MESH_ACTION_MCCA_ADVERTISEMENT_REQUEST, WLAN_MESH_ACTION_MCCA_ADVERTISEMENT, WLAN_MESH_ACTION_MCCA_TEARDOWN, WLAN_MESH_ACTION_TBTT_ADJUSTMENT_REQUEST, WLAN_MESH_ACTION_TBTT_ADJUSTMENT_RESPONSE, }; /* Unprotected WNM action codes */ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIM = 0, WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; /* Protected EHT action codes */ enum ieee80211_protected_eht_actioncode { WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ = 3, WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP = 4, WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN = 5, WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF = 6, WLAN_PROTECTED_EHT_ACTION_LINK_RECOMMEND = 7, WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_REQ = 8, WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_RESP = 9, WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_NOTIF = 10, WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ = 11, WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP = 12, }; /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, WLAN_KEY_LEN_WEP104 = 13, WLAN_KEY_LEN_CCMP = 16, WLAN_KEY_LEN_CCMP_256 = 32, WLAN_KEY_LEN_TKIP = 32, WLAN_KEY_LEN_AES_CMAC = 16, WLAN_KEY_LEN_SMS4 = 32, WLAN_KEY_LEN_GCMP = 16, WLAN_KEY_LEN_GCMP_256 = 32, WLAN_KEY_LEN_BIP_CMAC_256 = 32, WLAN_KEY_LEN_BIP_GMAC_128 = 16, WLAN_KEY_LEN_BIP_GMAC_256 = 32, }; enum ieee80211_s1g_actioncode { WLAN_S1G_AID_SWITCH_REQUEST, WLAN_S1G_AID_SWITCH_RESPONSE, WLAN_S1G_SYNC_CONTROL, WLAN_S1G_STA_INFO_ANNOUNCE, WLAN_S1G_EDCA_PARAM_SET, WLAN_S1G_EL_OPERATION, WLAN_S1G_TWT_SETUP, WLAN_S1G_TWT_TEARDOWN, WLAN_S1G_SECT_GROUP_ID_LIST, WLAN_S1G_SECT_ID_FEEDBACK, WLAN_S1G_TWT_INFORMATION = 11, }; /* Radio measurement action codes as defined in IEEE 802.11-2024 - Table 9-470 */ enum ieee80211_radio_measurement_actioncode { WLAN_RM_ACTION_RADIO_MEASUREMENT_REQUEST = 0, WLAN_RM_ACTION_RADIO_MEASUREMENT_REPORT = 1, WLAN_RM_ACTION_LINK_MEASUREMENT_REQUEST = 2, WLAN_RM_ACTION_LINK_MEASUREMENT_REPORT = 3, WLAN_RM_ACTION_NEIGHBOR_REPORT_REQUEST = 4, WLAN_RM_ACTION_NEIGHBOR_REPORT_RESPONSE = 5, }; #define IEEE80211_WEP_IV_LEN 4 #define IEEE80211_WEP_ICV_LEN 4 #define IEEE80211_CCMP_HDR_LEN 8 #define IEEE80211_CCMP_MIC_LEN 8 #define IEEE80211_CCMP_PN_LEN 6 #define IEEE80211_CCMP_256_HDR_LEN 8 #define IEEE80211_CCMP_256_MIC_LEN 16 #define IEEE80211_CCMP_256_PN_LEN 6 #define IEEE80211_TKIP_IV_LEN 8 #define IEEE80211_TKIP_ICV_LEN 4 #define IEEE80211_CMAC_PN_LEN 6 #define IEEE80211_GMAC_PN_LEN 6 #define IEEE80211_GCMP_HDR_LEN 8 #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 #define FILS_NONCE_LEN 16 #define FILS_MAX_KEK_LEN 64 #define FILS_ERP_MAX_USERNAME_LEN 16 #define FILS_ERP_MAX_REALM_LEN 253 #define FILS_ERP_MAX_RRK_LEN 64 #define PMK_MAX_LEN 64 #define SAE_PASSWORD_MAX_LEN 128 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_20_40_BSS_COEX = 0, WLAN_PUB_ACTION_DSE_ENABLEMENT = 1, WLAN_PUB_ACTION_DSE_DEENABLEMENT = 2, WLAN_PUB_ACTION_DSE_REG_LOC_ANN = 3, WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, WLAN_PUB_ACTION_DSE_MSMT_REQ = 5, WLAN_PUB_ACTION_DSE_MSMT_RESP = 6, WLAN_PUB_ACTION_MSMT_PILOT = 7, WLAN_PUB_ACTION_DSE_PC = 8, WLAN_PUB_ACTION_VENDOR_SPECIFIC = 9, WLAN_PUB_ACTION_GAS_INITIAL_REQ = 10, WLAN_PUB_ACTION_GAS_INITIAL_RESP = 11, WLAN_PUB_ACTION_GAS_COMEBACK_REQ = 12, WLAN_PUB_ACTION_GAS_COMEBACK_RESP = 13, WLAN_PUB_ACTION_TDLS_DISCOVER_RES = 14, WLAN_PUB_ACTION_LOC_TRACK_NOTI = 15, WLAN_PUB_ACTION_QAB_REQUEST_FRAME = 16, WLAN_PUB_ACTION_QAB_RESPONSE_FRAME = 17, WLAN_PUB_ACTION_QMF_POLICY = 18, WLAN_PUB_ACTION_QMF_POLICY_CHANGE = 19, WLAN_PUB_ACTION_QLOAD_REQUEST = 20, WLAN_PUB_ACTION_QLOAD_REPORT = 21, WLAN_PUB_ACTION_HCCA_TXOP_ADVERT = 22, WLAN_PUB_ACTION_HCCA_TXOP_RESPONSE = 23, WLAN_PUB_ACTION_PUBLIC_KEY = 24, WLAN_PUB_ACTION_CHANNEL_AVAIL_QUERY = 25, WLAN_PUB_ACTION_CHANNEL_SCHEDULE_MGMT = 26, WLAN_PUB_ACTION_CONTACT_VERI_SIGNAL = 27, WLAN_PUB_ACTION_GDD_ENABLEMENT_REQ = 28, WLAN_PUB_ACTION_GDD_ENABLEMENT_RESP = 29, WLAN_PUB_ACTION_NETWORK_CHANNEL_CONTROL = 30, WLAN_PUB_ACTION_WHITE_SPACE_MAP_ANN = 31, WLAN_PUB_ACTION_FTM_REQUEST = 32, WLAN_PUB_ACTION_FTM_RESPONSE = 33, WLAN_PUB_ACTION_FILS_DISCOVERY = 34, }; /* TDLS action codes */ enum ieee80211_tdls_actioncode { WLAN_TDLS_SETUP_REQUEST = 0, WLAN_TDLS_SETUP_RESPONSE = 1, WLAN_TDLS_SETUP_CONFIRM = 2, WLAN_TDLS_TEARDOWN = 3, WLAN_TDLS_PEER_TRAFFIC_INDICATION = 4, WLAN_TDLS_CHANNEL_SWITCH_REQUEST = 5, WLAN_TDLS_CHANNEL_SWITCH_RESPONSE = 6, WLAN_TDLS_PEER_PSM_REQUEST = 7, WLAN_TDLS_PEER_PSM_RESPONSE = 8, WLAN_TDLS_PEER_TRAFFIC_RESPONSE = 9, WLAN_TDLS_DISCOVERY_REQUEST = 10, }; /* Extended Channel Switching capability to be set in the 1st byte of * the @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING BIT(2) /* Multiple BSSID capability is set in the 6th bit of 3rd byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT BIT(6) /* Timing Measurement protocol for time sync is set in the 7th bit of 3rd byte * of the @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA3_TIMING_MEASUREMENT_SUPPORT BIT(7) /* TDLS capabilities in the 4th byte of @WLAN_EID_EXT_CAPABILITY */ #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) #define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) #define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) /* Interworking capabilities are set in 7th bit of 4th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA4_INTERWORKING_ENABLED BIT(7) /* * TDLS capabililites to be enabled in the 5th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) #define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED BIT(7) #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(5) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) /* Defines the maximal number of MSDUs in an A-MSDU. */ #define WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB BIT(7) #define WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB BIT(0) /* * Fine Timing Measurement Initiator - bit 71 of @WLAN_EID_EXT_CAPABILITY * information element */ #define WLAN_EXT_CAPA9_FTM_INITIATOR BIT(7) /* Defines support for TWT Requester and TWT Responder */ #define WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT BIT(5) #define WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT BIT(6) /* * When set, indicates that the AP is able to tolerate 26-tone RU UL * OFDMA transmissions using HE TB PPDU from OBSS (not falsely classify the * 26-tone RU UL OFDMA transmissions as radar pulses). */ #define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7) /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) /* Enable Beacon Protection */ #define WLAN_EXT_CAPA11_BCN_PROTECT BIT(4) /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 /* BSS Coex IE information field bits */ #define WLAN_BSS_COEX_INFORMATION_REQUEST BIT(0) /** * enum ieee80211_mesh_sync_method - mesh synchronization method identifier * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method * that will be specified in a vendor specific information element */ enum ieee80211_mesh_sync_method { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, IEEE80211_SYNC_METHOD_VENDOR = 255, }; /** * enum ieee80211_mesh_path_protocol - mesh path selection protocol identifier * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will * be specified in a vendor specific information element */ enum ieee80211_mesh_path_protocol { IEEE80211_PATH_PROTOCOL_HWMP = 1, IEEE80211_PATH_PROTOCOL_VENDOR = 255, }; /** * enum ieee80211_mesh_path_metric - mesh path selection metric identifier * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be * specified in a vendor specific information element */ enum ieee80211_mesh_path_metric { IEEE80211_PATH_METRIC_AIRTIME = 1, IEEE80211_PATH_METRIC_VENDOR = 255, }; /** * enum ieee80211_root_mode_identifier - root mesh STA mode identifier * * These attribute are used by dot11MeshHWMPRootMode to set root mesh STA mode * * @IEEE80211_ROOTMODE_NO_ROOT: the mesh STA is not a root mesh STA (default) * @IEEE80211_ROOTMODE_ROOT: the mesh STA is a root mesh STA if greater than * this value * @IEEE80211_PROACTIVE_PREQ_NO_PREP: the mesh STA is a root mesh STA supports * the proactive PREQ with proactive PREP subfield set to 0 * @IEEE80211_PROACTIVE_PREQ_WITH_PREP: the mesh STA is a root mesh STA * supports the proactive PREQ with proactive PREP subfield set to 1 * @IEEE80211_PROACTIVE_RANN: the mesh STA is a root mesh STA supports * the proactive RANN */ enum ieee80211_root_mode_identifier { IEEE80211_ROOTMODE_NO_ROOT = 0, IEEE80211_ROOTMODE_ROOT = 1, IEEE80211_PROACTIVE_PREQ_NO_PREP = 2, IEEE80211_PROACTIVE_PREQ_WITH_PREP = 3, IEEE80211_PROACTIVE_RANN = 4, }; /* * IEEE 802.11-2007 7.3.2.9 Country information element * * Minimum length is 8 octets, ie len must be evenly * divisible by 2 */ /* Although the spec says 8 I'm seeing 6 in practice */ #define IEEE80211_COUNTRY_IE_MIN_LEN 6 /* The Country String field of the element shall be 3 octets in length */ #define IEEE80211_COUNTRY_STRING_LEN 3 /* * For regulatory extension stuff see IEEE 802.11-2007 * Annex I (page 1141) and Annex J (page 1147). Also * review 7.3.2.9. * * When dot11RegulatoryClassesRequired is true and the * first_channel/reg_extension_id is >= 201 then the IE * compromises of the 'ext' struct represented below: * * - Regulatory extension ID - when generating IE this just needs * to be monotonically increasing for each triplet passed in * the IE * - Regulatory class - index into set of rules * - Coverage class - index into air propagation time (Table 7-27), * in microseconds, you can compute the air propagation time from * the index by multiplying by 3, so index 10 yields a propagation * of 10 us. Valid values are 0-31, values 32-255 are not defined * yet. A value of 0 inicates air propagation of <= 1 us. * * See also Table I.2 for Emission limit sets and table * I.3 for Behavior limit sets. Table J.1 indicates how to map * a reg_class to an emission limit set and behavior limit set. */ #define IEEE80211_COUNTRY_EXTENSION_ID 201 /* * Channels numbers in the IE must be monotonically increasing * if dot11RegulatoryClassesRequired is not true. * * If dot11RegulatoryClassesRequired is true consecutive * subband triplets following a regulatory triplet shall * have monotonically increasing first_channel number fields. * * Channel numbers shall not overlap. * * Note that max_power is signed. */ struct ieee80211_country_ie_triplet { union { struct { u8 first_channel; u8 num_channels; s8 max_power; } __packed chans; struct { u8 reg_extension_id; u8 reg_class; u8 coverage_class; } __packed ext; }; } __packed; enum ieee80211_timeout_interval_type { WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */, WLAN_TIMEOUT_KEY_LIFETIME = 2 /* 802.11r */, WLAN_TIMEOUT_ASSOC_COMEBACK = 3 /* 802.11w */, }; /** * struct ieee80211_timeout_interval_ie - Timeout Interval element * @type: type, see &enum ieee80211_timeout_interval_type * @value: timeout interval value */ struct ieee80211_timeout_interval_ie { u8 type; __le32 value; } __packed; /** * enum ieee80211_idle_options - BSS idle options * @WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE: the station should send an RSN * protected frame to the AP to reset the idle timer at the AP for * the station. */ enum ieee80211_idle_options { WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE = BIT(0), }; /** * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct * * This structure refers to "BSS Max idle period element" * * @max_idle_period: indicates the time period during which a station can * refrain from transmitting frames to its associated AP without being * disassociated. In units of 1000 TUs. * @idle_options: indicates the options associated with the BSS idle capability * as specified in &enum ieee80211_idle_options. */ struct ieee80211_bss_max_idle_period_ie { __le16 max_idle_period; u8 idle_options; } __packed; /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, WLAN_ACTION_ADDBA_RESP = 1, WLAN_ACTION_DELBA = 2, }; /* BACK (block-ack) parties */ enum ieee80211_back_parties { WLAN_BACK_RECIPIENT = 0, WLAN_BACK_INITIATOR = 1, }; /* SA Query action */ enum ieee80211_sa_query_action { WLAN_ACTION_SA_QUERY_REQUEST = 0, WLAN_ACTION_SA_QUERY_RESPONSE = 1, }; /** * struct ieee80211_bssid_index - multiple BSSID index element structure * * This structure refers to "Multiple BSSID-index element" * * @bssid_index: BSSID index * @dtim_period: optional, overrides transmitted BSS dtim period * @dtim_count: optional, overrides transmitted BSS dtim count */ struct ieee80211_bssid_index { u8 bssid_index; u8 dtim_period; u8 dtim_count; }; /** * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration * element structure * * This structure refers to "Multiple BSSID Configuration element" * * @bssid_count: total number of active BSSIDs in the set * @profile_periodicity: the least number of beacon frames need to be received * in order to discover all the nontransmitted BSSIDs in the set. */ struct ieee80211_multiple_bssid_configuration { u8 bssid_count; u8 profile_periodicity; }; #define SUITE(oui, id) (((oui) << 8) | (id)) /* cipher suite selectors */ #define WLAN_CIPHER_SUITE_USE_GROUP SUITE(0x000FAC, 0) #define WLAN_CIPHER_SUITE_WEP40 SUITE(0x000FAC, 1) #define WLAN_CIPHER_SUITE_TKIP SUITE(0x000FAC, 2) /* reserved: SUITE(0x000FAC, 3) */ #define WLAN_CIPHER_SUITE_CCMP SUITE(0x000FAC, 4) #define WLAN_CIPHER_SUITE_WEP104 SUITE(0x000FAC, 5) #define WLAN_CIPHER_SUITE_AES_CMAC SUITE(0x000FAC, 6) #define WLAN_CIPHER_SUITE_GCMP SUITE(0x000FAC, 8) #define WLAN_CIPHER_SUITE_GCMP_256 SUITE(0x000FAC, 9) #define WLAN_CIPHER_SUITE_CCMP_256 SUITE(0x000FAC, 10) #define WLAN_CIPHER_SUITE_BIP_GMAC_128 SUITE(0x000FAC, 11) #define WLAN_CIPHER_SUITE_BIP_GMAC_256 SUITE(0x000FAC, 12) #define WLAN_CIPHER_SUITE_BIP_CMAC_256 SUITE(0x000FAC, 13) #define WLAN_CIPHER_SUITE_SMS4 SUITE(0x001472, 1) /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) #define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) #define WLAN_AKM_SUITE_FT_8021X SUITE(0x000FAC, 3) #define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) #define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) #define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) #define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) #define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) #define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) #define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_AKM_SUITE_WFA_DPP SUITE(WLAN_OUI_WFA, 2) #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMK_NAME_LEN 16 #define WLAN_PMKID_LEN 16 #define WLAN_PMK_LEN_EAP_LEAP 16 #define WLAN_PMK_LEN 32 #define WLAN_PMK_LEN_SUITE_B_192 48 #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 #define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 #define WLAN_OUI_TYPE_MICROSOFT_WPS 4 #define WLAN_OUI_TYPE_MICROSOFT_TPC 8 /* * WMM/802.11e Tspec Element */ #define IEEE80211_WMM_IE_TSPEC_TID_MASK 0x0F #define IEEE80211_WMM_IE_TSPEC_TID_SHIFT 1 enum ieee80211_tspec_status_code { IEEE80211_TSPEC_STATUS_ADMISS_ACCEPTED = 0, IEEE80211_TSPEC_STATUS_ADDTS_INVAL_PARAMS = 0x1, }; struct ieee80211_tspec_ie { u8 element_id; u8 len; u8 oui[3]; u8 oui_type; u8 oui_subtype; u8 version; __le16 tsinfo; u8 tsinfo_resvd; __le16 nominal_msdu; __le16 max_msdu; __le32 min_service_int; __le32 max_service_int; __le32 inactivity_int; __le32 suspension_int; __le32 service_start_time; __le32 min_data_rate; __le32 mean_data_rate; __le32 peak_data_rate; __le32 max_burst_size; __le32 delay_bound; __le32 min_phy_rate; __le16 sba; __le16 medium_time; } __packed; struct ieee80211_he_6ghz_capa { /* uses IEEE80211_HE_6GHZ_CAP_* below */ __le16 capa; } __packed; /* HE 6 GHz band capabilities */ /* uses enum ieee80211_min_mpdu_spacing values */ #define IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START 0x0007 /* uses enum ieee80211_vht_max_ampdu_length_exp values */ #define IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP 0x0038 /* uses IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_* values */ #define IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN 0x00c0 /* WLAN_HT_CAP_SM_PS_* values */ #define IEEE80211_HE_6GHZ_CAP_SM_PS 0x0600 #define IEEE80211_HE_6GHZ_CAP_RD_RESPONDER 0x0800 #define IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS 0x1000 #define IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS 0x2000 /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame * Return: a pointer to the QoS control field in the frame header * * The qos ctrl bytes come after the frame_control, duration, seq_num * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose * between struct ieee80211_qos_hdr_4addr and struct ieee80211_qos_hdr. */ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) { union { struct ieee80211_qos_hdr addr3; struct ieee80211_qos_hdr_4addr addr4; } *qos; qos = (void *)hdr; if (ieee80211_has_a4(qos->addr3.frame_control)) return (u8 *)&qos->addr4.qos_ctrl; else return (u8 *)&qos->addr3.qos_ctrl; } /** * ieee80211_get_tid - get qos TID * @hdr: the frame * Return: the TID from the QoS control field */ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) { u8 *qc = ieee80211_get_qos_ctl(hdr); return qc[0] & IEEE80211_QOS_CTL_TID_MASK; } /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame * Return: a pointer to the source address (SA) * * Given an 802.11 frame, this function returns the offset * to the source address (SA). It does not verify that the * header is long enough to contain the address, and the * header must be long enough to contain the frame control * field. */ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) { if (ieee80211_has_a4(hdr->frame_control)) return hdr->addr4; if (ieee80211_has_fromds(hdr->frame_control)) return hdr->addr3; return hdr->addr2; } /** * ieee80211_get_DA - get pointer to DA * @hdr: the frame * Return: a pointer to the destination address (DA) * * Given an 802.11 frame, this function returns the offset * to the destination address (DA). It does not verify that * the header is long enough to contain the address, and the * header must be long enough to contain the frame control * field. */ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) { if (ieee80211_has_tods(hdr->frame_control)) return hdr->addr3; else return hdr->addr1; } /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @skb: the skb to check, starting with the 802.11 header * Return: whether or not the MMPDU is bufferable */ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; __le16 fc = mgmt->frame_control; /* * IEEE 802.11 REVme D2.0 definition of bufferable MMPDU; * note that this ignores the IBSS special case. */ if (!ieee80211_is_mgmt(fc)) return false; if (ieee80211_is_disassoc(fc) || ieee80211_is_deauth(fc)) return true; if (!ieee80211_is_action(fc)) return false; if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code)) return true; /* action frame - additionally check for non-bufferable FTM */ if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) return true; if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST || mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) return false; return true; } /** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) * Return: whether or not the frame is a robust management frame */ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { if (ieee80211_is_disassoc(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control)) return true; if (ieee80211_is_action(hdr->frame_control)) { u8 *category; /* * Action frames, excluding Public Action frames, are Robust * Management Frames. However, if we are looking at a Protected * frame, skip the check since the data may be encrypted and * the frame has already been found to be a Robust Management * Frame (by the other end). */ if (ieee80211_has_protected(hdr->frame_control)) return true; category = ((u8 *) hdr) + 24; return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && *category != WLAN_CATEGORY_WNM_UNPROTECTED && *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && *category != WLAN_CATEGORY_S1G && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } return false; } /** * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame * @skb: the skb containing the frame, length will be checked * Return: whether or not the frame is a robust management frame */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; return _ieee80211_is_robust_mgmt_frame((void *)skb->data); } /** * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame * @len: length of the frame * Return: whether or not the frame is a public action frame */ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, size_t len) { struct ieee80211_mgmt *mgmt = (void *)hdr; if (len < IEEE80211_MIN_ACTION_SIZE) return false; if (!ieee80211_is_action(hdr->frame_control)) return false; return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } /** * ieee80211_is_protected_dual_of_public_action - check if skb contains a * protected dual of public action management frame * @skb: the skb containing the frame, length will be checked * * Return: true if the skb contains a protected dual of public action * management frame, false otherwise. */ static inline bool ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) { u8 action; if (!ieee80211_is_public_action((void *)skb->data, skb->len) || skb->len < IEEE80211_MIN_ACTION_SIZE + 1) return false; action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); return action != WLAN_PUB_ACTION_20_40_BSS_COEX && action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && action != WLAN_PUB_ACTION_MSMT_PILOT && action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && action != WLAN_PUB_ACTION_FTM_REQUEST && action != WLAN_PUB_ACTION_FTM_RESPONSE && action != WLAN_PUB_ACTION_FILS_DISCOVERY && action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; } /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @hdr: the frame * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) { struct ieee80211_mgmt *mgmt = (void *)hdr; if (!ieee80211_is_action(hdr->frame_control) || !is_multicast_ether_addr(hdr->addr1)) return false; return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION || mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION; } /** * ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @skb: the skb containing the frame, length will be checked * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; return _ieee80211_is_group_privacy_action((void *)skb->data); } /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs * Return: the time value converted to microseconds */ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) { return 1024 * tu; } static inline bool __ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) { u8 mask; u8 index, indexn1, indexn2; if (unlikely(!tim || tim_len < sizeof(*tim))) return false; aid &= 0x3fff; index = aid / 8; mask = 1 << (aid & 7); indexn1 = tim->bitmap_ctrl & 0xfe; indexn2 = tim_len + indexn1 - 4; if (index < indexn1 || index > indexn2) return false; index -= indexn1; return !!(tim->virtual_map[index] & mask); } struct s1g_tim_aid { u16 aid; u8 target_blk; /* Target block index */ u8 target_subblk; /* Target subblock index */ u8 target_subblk_bit; /* Target subblock bit */ }; struct s1g_tim_enc_block { u8 enc_mode; bool inverse; const u8 *ptr; u8 len; /* * For an OLB encoded block that spans multiple blocks, this * is the offset into the span described by that encoded block. */ u8 olb_blk_offset; }; /* * Helper routines to quickly extract the length of an encoded block. Validation * is also performed to ensure the length extracted lies within the TIM. */ static inline int ieee80211_s1g_len_bitmap(const u8 *ptr, const u8 *end) { u8 blkmap; u8 n_subblks; if (ptr >= end) return -EINVAL; blkmap = *ptr; n_subblks = hweight8(blkmap); if (ptr + 1 + n_subblks > end) return -EINVAL; return 1 + n_subblks; } static inline int ieee80211_s1g_len_single(const u8 *ptr, const u8 *end) { return (ptr + 1 > end) ? -EINVAL : 1; } static inline int ieee80211_s1g_len_olb(const u8 *ptr, const u8 *end) { if (ptr >= end) return -EINVAL; return (ptr + 1 + *ptr > end) ? -EINVAL : 1 + *ptr; } /* * Enumerate all encoded blocks until we find the encoded block that describes * our target AID. OLB is a special case as a single encoded block can describe * multiple blocks as a single encoded block. */ static inline int ieee80211_s1g_find_target_block(struct s1g_tim_enc_block *enc, const struct s1g_tim_aid *aid, const u8 *ptr, const u8 *end) { /* need at least block-control octet */ while (ptr + 1 <= end) { u8 ctrl = *ptr++; u8 mode = ctrl & 0x03; bool contains, inverse = ctrl & BIT(2); u8 span, blk_off = ctrl >> 3; int len; switch (mode) { case IEEE80211_S1G_TIM_ENC_MODE_BLOCK: len = ieee80211_s1g_len_bitmap(ptr, end); contains = blk_off == aid->target_blk; break; case IEEE80211_S1G_TIM_ENC_MODE_SINGLE: len = ieee80211_s1g_len_single(ptr, end); contains = blk_off == aid->target_blk; break; case IEEE80211_S1G_TIM_ENC_MODE_OLB: len = ieee80211_s1g_len_olb(ptr, end); /* * An OLB encoded block can describe more then one * block, meaning an encoded OLB block can span more * then a single block. */ if (len > 0) { /* Minus one for the length octet */ span = DIV_ROUND_UP(len - 1, 8); /* * Check if our target block lies within the * block span described by this encoded block. */ contains = (aid->target_blk >= blk_off) && (aid->target_blk < blk_off + span); } break; default: return -EOPNOTSUPP; } if (len < 0) return len; if (contains) { enc->enc_mode = mode; enc->inverse = inverse; enc->ptr = ptr; enc->len = (u8)len; enc->olb_blk_offset = blk_off; return 0; } ptr += len; } return -ENOENT; } static inline bool ieee80211_s1g_parse_bitmap(struct s1g_tim_enc_block *enc, struct s1g_tim_aid *aid) { const u8 *ptr = enc->ptr; u8 blkmap = *ptr++; /* * If our block bitmap does not contain a set bit that corresponds * to our AID, it could mean a variety of things depending on if * the encoding mode is inverted or not. * * 1. If inverted, it means the entire subblock is present and hence * our AID has been set. * 2. If not inverted, it means our subblock is not present and hence * it is all zero meaning our AID is not set. */ if (!(blkmap & BIT(aid->target_subblk))) return enc->inverse; /* * Increment ptr by the number of set subblocks that appear before our * target subblock. If our target subblock is 0, do nothing as ptr * already points to our target subblock. */ if (aid->target_subblk) ptr += hweight8(blkmap & GENMASK(aid->target_subblk - 1, 0)); return !!(*ptr & BIT(aid->target_subblk_bit)) ^ enc->inverse; } static inline bool ieee80211_s1g_parse_single(struct s1g_tim_enc_block *enc, struct s1g_tim_aid *aid) { /* * Single AID mode describes, as the name suggests, a single AID * within the block described by the encoded block. The octet * contains the 6 LSBs of the AID described in the block. The other * 2 bits are reserved. When inversed, every single AID described * by the current block have buffered traffic except for the AID * described in the single AID octet. */ return ((*enc->ptr & 0x3f) == (aid->aid & 0x3f)) ^ enc->inverse; } static inline bool ieee80211_s1g_parse_olb(struct s1g_tim_enc_block *enc, struct s1g_tim_aid *aid) { const u8 *ptr = enc->ptr; u8 blk_len = *ptr++; /* * Given an OLB encoded block that describes multiple blocks, * calculate the offset into the span. Then calculate the * subblock location normally. */ u16 span_offset = aid->target_blk - enc->olb_blk_offset; u16 subblk_idx = span_offset * 8 + aid->target_subblk; if (subblk_idx >= blk_len) return enc->inverse; return !!(ptr[subblk_idx] & BIT(aid->target_subblk_bit)) ^ enc->inverse; } /* * An S1G PVB has 3 non optional encoding types, each that can be inverted. * An S1G PVB is constructed with zero or more encoded block subfields. Each * encoded block represents a single "block" of AIDs (64), and each encoded * block can contain one of the 3 encoding types alongside a single bit for * whether the bits should be inverted. * * As the standard makes no guarantee about the ordering of encoded blocks, * we must parse every encoded block in the worst case scenario given an * AID that lies within the last block. */ static inline bool ieee80211_s1g_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) { int err; struct s1g_tim_aid target_aid; struct s1g_tim_enc_block enc_blk; if (tim_len < 3) return false; target_aid.aid = aid; target_aid.target_blk = (aid >> 6) & 0x1f; target_aid.target_subblk = (aid >> 3) & 0x7; target_aid.target_subblk_bit = aid & 0x7; /* * Find our AIDs target encoded block and fill &enc_blk with the * encoded blocks information. If no entry is found or an error * occurs return false. */ err = ieee80211_s1g_find_target_block(&enc_blk, &target_aid, tim->virtual_map, (const u8 *)tim + tim_len + 2); if (err) return false; switch (enc_blk.enc_mode) { case IEEE80211_S1G_TIM_ENC_MODE_BLOCK: return ieee80211_s1g_parse_bitmap(&enc_blk, &target_aid); case IEEE80211_S1G_TIM_ENC_MODE_SINGLE: return ieee80211_s1g_parse_single(&enc_blk, &target_aid); case IEEE80211_S1G_TIM_ENC_MODE_OLB: return ieee80211_s1g_parse_olb(&enc_blk, &target_aid); default: return false; } } /** * ieee80211_check_tim - check if AID bit is set in TIM * @tim: the TIM IE * @tim_len: length of the TIM IE * @aid: the AID to look for * @s1g: whether the TIM is from an S1G PPDU * Return: whether or not traffic is indicated in the TIM for the given AID */ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid, bool s1g) { return s1g ? ieee80211_s1g_check_tim(tim, tim_len, aid) : __ieee80211_check_tim(tim, tim_len, aid); } /** * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action * frame * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { /* Point to where the indication of TDLS should start */ const u8 *tdls_data = skb_network_header(skb) - 2; if (get_unaligned_be16(tdls_data) == ETH_P_TDLS && tdls_data[2] == WLAN_TDLS_SNAP_RFTYPE && tdls_data[3] == WLAN_CATEGORY_TDLS) return tdls_data[4]; } return -1; } /* convert time units */ #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) /* convert frequencies */ #define MHZ_TO_KHZ(freq) ((freq) * 1000) #define KHZ_TO_MHZ(freq) ((freq) / 1000) #define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000 #define KHZ_F "%d.%03d" /* convert powers */ #define DBI_TO_MBI(gain) ((gain) * 100) #define MBI_TO_DBI(gain) ((gain) / 100) #define DBM_TO_MBM(gain) ((gain) * 100) #define MBM_TO_DBM(gain) ((gain) / 100) /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked * Return: %true if the frame contains a TPC element, %false otherwise * * This function checks if it's either TPC report action frame or Link * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 * and 8.5.7.5 accordingly. */ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (skb->len < IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.tpc_report)) return false; /* * TPC report - check that: * category = 0 (Spectrum Management) or 5 (Radio Measurement) * spectrum management action = 3 (TPC/Link Measurement report) * TPC report EID = 35 * TPC report element length = 2 * * The spectrum management's tpc_report struct is used here both for * parsing tpc_report and radio measurement's link measurement report * frame, since the relevant part is identical in both frames. */ if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT && mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT) return false; /* both spectrum mgmt and link measurement have same action code */ if (mgmt->u.action.u.tpc_report.action_code != WLAN_ACTION_SPCT_TPC_RPRT) return false; if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || mgmt->u.action.u.tpc_report.tpc_elem_length != sizeof(struct ieee80211_tpc_report_ie)) return false; return true; } /** * ieee80211_is_timing_measurement - check if frame is timing measurement response * @skb: the SKB to check * Return: whether or not the frame is a valid timing measurement response */ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && mgmt->u.action.u.wnm_timing_msr.action_code == WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) return true; return false; } /** * ieee80211_is_ftm - check if frame is FTM response * @skb: the SKB to check * Return: whether or not the frame is a valid FTM response action frame */ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (!ieee80211_is_public_action((void *)mgmt, skb->len)) return false; if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE && skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) return true; return false; } /** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder * @variable: pointer to the beacon frame elements * @variable_len: length of the frame elements * Return: whether or not the frame is an S1G short beacon. As per * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall * always be present as the first element in beacon frames generated at a * TBTT (Target Beacon Transmission Time), so any frame not containing * this element must have been generated at a TSBTT (Target Short Beacon * Transmission Time) that is not a TBTT. Additionally, short beacons are * prohibited from containing the S1G beacon compatibility element as per * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with * either no elements or the first element is not the beacon compatibility * element, we have a short beacon. */ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, size_t variable_len) { if (!ieee80211_is_s1g_beacon(fc)) return false; /* * If the frame does not contain at least 1 element (this is perfectly * valid in a short beacon) and is an S1G beacon, we have a short * beacon. */ if (variable_len < 2) return true; return variable[0] != WLAN_EID_S1G_BCN_COMPAT; } struct element { u8 id; u8 datalen; u8 data[]; } __packed; /* element iteration helpers */ #define for_each_element(_elem, _data, _datalen) \ for (_elem = (const struct element *)(_data); \ (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >= \ (int)sizeof(*_elem) && \ (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >= \ (int)sizeof(*_elem) + _elem->datalen; \ _elem = (const struct element *)(_elem->data + _elem->datalen)) #define for_each_element_id(element, _id, data, datalen) \ for_each_element(element, data, datalen) \ if (element->id == (_id)) #define for_each_element_extid(element, extid, _data, _datalen) \ for_each_element(element, _data, _datalen) \ if (element->id == WLAN_EID_EXTENSION && \ element->datalen > 0 && \ element->data[0] == (extid)) #define for_each_subelement(sub, element) \ for_each_element(sub, (element)->data, (element)->datalen) #define for_each_subelement_id(sub, id, element) \ for_each_element_id(sub, id, (element)->data, (element)->datalen) #define for_each_subelement_extid(sub, extid, element) \ for_each_element_extid(sub, extid, (element)->data, (element)->datalen) /** * for_each_element_completed - determine if element parsing consumed all data * @element: element pointer after for_each_element() or friends * @data: same data pointer as passed to for_each_element() or friends * @datalen: same data length as passed to for_each_element() or friends * Return: %true if all elements were iterated, %false otherwise; see notes * * This function returns %true if all the data was parsed or considered * while walking the elements. Only use this if your for_each_element() * loop cannot be broken out of, otherwise it always returns %false. * * If some data was malformed, this returns %false since the last parsed * element will not fill the whole remaining data. */ static inline bool for_each_element_completed(const struct element *element, const void *data, size_t datalen) { return (const u8 *)element == (const u8 *)data + datalen; } /* * RSNX Capabilities: * bits 0-3: Field length (n-1) */ #define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) #define WLAN_RSNX_CAPA_SAE_H2E BIT(5) /* * reduced neighbor report, based on Draft P802.11ax_D6.1, * section 9.4.2.170 and accepted contributions. */ #define IEEE80211_AP_INFO_TBTT_HDR_TYPE 0x03 #define IEEE80211_AP_INFO_TBTT_HDR_FILTERED 0x04 #define IEEE80211_AP_INFO_TBTT_HDR_COLOC 0x08 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT 0xF0 #define IEEE80211_TBTT_INFO_TYPE_TBTT 0 #define IEEE80211_TBTT_INFO_TYPE_MLD 1 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED 0x01 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID 0x02 #define IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID 0x04 #define IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID 0x08 #define IEEE80211_RNR_TBTT_PARAMS_COLOC_ESS 0x10 #define IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE 0x20 #define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP 0x40 #define IEEE80211_RNR_TBTT_PARAMS_PSD_NO_LIMIT 127 #define IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED -128 struct ieee80211_neighbor_ap_info { u8 tbtt_info_hdr; u8 tbtt_info_len; u8 op_class; u8 channel; } __packed; enum ieee80211_range_params_max_total_ltf { IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_4 = 0, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_8, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_16, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, }; /* * reduced neighbor report, based on Draft P802.11be_D3.0, * section 9.4.2.170.2. */ struct ieee80211_rnr_mld_params { u8 mld_id; __le16 params; } __packed; #define IEEE80211_RNR_MLD_PARAMS_LINK_ID 0x000F #define IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT 0x0FF0 #define IEEE80211_RNR_MLD_PARAMS_UPDATES_INCLUDED 0x1000 #define IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK 0x2000 /* Format of the TBTT information element if it has 7, 8 or 9 bytes */ struct ieee80211_tbtt_info_7_8_9 { u8 tbtt_offset; u8 bssid[ETH_ALEN]; /* The following element is optional, structure may not grow */ u8 bss_params; s8 psd_20; } __packed; /* Format of the TBTT information element if it has >= 11 bytes */ struct ieee80211_tbtt_info_ge_11 { u8 tbtt_offset; u8 bssid[ETH_ALEN]; __le32 short_ssid; /* The following elements are optional, structure may grow */ u8 bss_params; s8 psd_20; struct ieee80211_rnr_mld_params mld_params; } __packed; /* multi-link device */ #define IEEE80211_MLD_MAX_NUM_LINKS 15 #define IEEE80211_ML_CONTROL_TYPE 0x0007 #define IEEE80211_ML_CONTROL_TYPE_BASIC 0 #define IEEE80211_ML_CONTROL_TYPE_PREQ 1 #define IEEE80211_ML_CONTROL_TYPE_RECONF 2 #define IEEE80211_ML_CONTROL_TYPE_TDLS 3 #define IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS 4 #define IEEE80211_ML_CONTROL_PRESENCE_MASK 0xfff0 struct ieee80211_multi_link_elem { __le16 control; u8 variable[]; } __packed; #define IEEE80211_MLC_BASIC_PRES_LINK_ID 0x0010 #define IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT 0x0020 #define IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY 0x0040 #define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 #define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 #define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 #define IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP 0x0400 #define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff #define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 #define IEEE80211_MED_SYNC_DELAY_SYNC_MAX_NUM_TXOPS 0xf000 /* * Described in P802.11be_D3.0 * dot11MSDTimerDuration should default to 5484 (i.e. 171.375) * dot11MSDOFDMEDthreshold defaults to -72 (i.e. 0) * dot11MSDTXOPMAX defaults to 1 */ #define IEEE80211_MED_SYNC_DELAY_DEFAULT 0x10ac #define IEEE80211_EML_CAP_EMLSR_SUPP 0x0001 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x000e #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 1 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_64US 2 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_128US 3 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 4 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x0070 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_16US 1 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_32US 2 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 3 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_128US 4 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 5 #define IEEE80211_EML_CAP_EMLMR_SUPPORT 0x0080 #define IEEE80211_EML_CAP_EMLMR_DELAY 0x0700 #define IEEE80211_EML_CAP_EMLMR_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLMR_DELAY_32US 1 #define IEEE80211_EML_CAP_EMLMR_DELAY_64US 2 #define IEEE80211_EML_CAP_EMLMR_DELAY_128US 3 #define IEEE80211_EML_CAP_EMLMR_DELAY_256US 4 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT 0x7800 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_0 0 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128US 1 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_256US 2 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_512US 3 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_1TU 4 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_2TU 5 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_4TU 6 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_8TU 7 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_16TU 8 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_32TU 9 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_64TU 10 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 11 #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 #define IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT 0x2000 #define IEEE80211_MLD_CAP_OP_ALIGNED_TWT_SUPPORT 0x4000 struct ieee80211_mle_basic_common_info { u8 len; u8 mld_mac_addr[ETH_ALEN]; u8 variable[]; } __packed; #define IEEE80211_MLC_PREQ_PRES_MLD_ID 0x0010 struct ieee80211_mle_preq_common_info { u8 len; u8 variable[]; } __packed; #define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 #define IEEE80211_MLC_RECONF_PRES_EML_CAPA 0x0020 #define IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP 0x0040 #define IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP 0x0080 /* no fixed fields in RECONF */ struct ieee80211_mle_tdls_common_info { u8 len; u8 ap_mld_mac_addr[ETH_ALEN]; } __packed; #define IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR 0x0010 /* no fixed fields in PRIO_ACCESS */ /** * ieee80211_mle_common_size - check multi-link element common size * @data: multi-link element, must already be checked for size using * ieee80211_mle_size_ok() * Return: the size of the multi-link element's "common" subfield */ static inline u8 ieee80211_mle_common_size(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: case IEEE80211_ML_CONTROL_TYPE_PREQ: case IEEE80211_ML_CONTROL_TYPE_TDLS: case IEEE80211_ML_CONTROL_TYPE_RECONF: case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: /* * The length is the first octet pointed by mle->variable so no * need to add anything */ break; default: WARN_ON(1); return 0; } return sizeof(*mle) + mle->variable[0]; } /** * ieee80211_mle_get_link_id - returns the link ID * @data: the basic multi link element * Return: the link ID, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline int ieee80211_mle_get_link_id(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) return -1; return *common; } /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @data: pointer to the basic multi link element * Return: the BSS Parameter Change Count field value, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline int ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; return *common; } /** * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay * @data: pointer to the multi-link element * Return: the medium synchronization delay field value from the multi-link * element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT) * if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)) return IEEE80211_MED_SYNC_DELAY_DEFAULT; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; return get_unaligned_le16(common); } /** * ieee80211_mle_get_eml_cap - returns the EML capability * @data: pointer to the multi-link element * Return: the EML capability field value from the multi-link element, * or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; return get_unaligned_le16(common); } /** * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. * @data: pointer to the multi-link element * Return: the MLD capabilities and operations field value from the multi-link * element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* * common points now at the beginning of * ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; return get_unaligned_le16(common); } /* Defined in Figure 9-1074t in P802.11be_D7.0 */ #define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE 0x0001 #define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_RECO_MAX_LINKS_MASK 0x001e #define IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE 0x0020 #define IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK 0x0040 #define IEEE80211_EHT_ML_EXT_MLD_CAPA_BTM_MLD_RECO_MULTI_AP 0x0080 /** * ieee80211_mle_get_ext_mld_capa_op - returns the extended MLD capabilities * and operations. * @data: pointer to the multi-link element * Return: the extended MLD capabilities and operations field value from * the multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_ext_mld_capa_op(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* * common points now at the beginning of * ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) common += 1; return get_unaligned_le16(common); } /** * ieee80211_mle_get_mld_id - returns the MLD ID * @data: pointer to the multi-link element * Return: The MLD ID in the given multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* * common points now at the beginning of * ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) common += 2; return *common; } /** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element * Return: whether or not the multi-link element size is OK */ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u8 fixed = sizeof(*mle); u8 common = 0; bool check_common_len = false; u16 control; if (!data || len < fixed) return false; control = le16_to_cpu(mle->control); switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: common += sizeof(struct ieee80211_mle_basic_common_info); check_common_len = true; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP) common += 2; break; case IEEE80211_ML_CONTROL_TYPE_PREQ: common += sizeof(struct ieee80211_mle_preq_common_info); if (control & IEEE80211_MLC_PREQ_PRES_MLD_ID) common += 1; check_common_len = true; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; if (control & IEEE80211_MLC_RECONF_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP) common += 2; if (control & IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP) common += 2; break; case IEEE80211_ML_CONTROL_TYPE_TDLS: common += sizeof(struct ieee80211_mle_tdls_common_info); check_common_len = true; break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: common = ETH_ALEN + 1; break; default: /* we don't know this type */ return true; } if (len < fixed + common) return false; if (!check_common_len) return true; /* if present, common length is the first octet there */ return mle->variable[0] >= common; } /** * ieee80211_mle_type_ok - validate multi-link element type and size * @data: pointer to the element data * @type: expected type of the element * @len: length of the containing element * Return: whether or not the multi-link element type matches and size is OK */ static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control; if (!ieee80211_mle_size_ok(data, len)) return false; control = le16_to_cpu(mle->control); if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) == type) return true; return false; } enum ieee80211_mle_subelems { IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, IEEE80211_MLE_SUBELEM_FRAGMENT = 254, }; #define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f #define IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT 0x0040 #define IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT 0x0080 #define IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT 0x0100 #define IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT 0x0200 #define IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE 0x0400 #define IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT 0x0800 struct ieee80211_mle_per_sta_profile { __le16 control; u8 sta_info_len; u8 variable[]; } __packed; /** * ieee80211_mle_basic_sta_prof_size_ok - validate basic multi-link element sta * profile size * @data: pointer to the sub element data * @len: length of the containing sub element * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, size_t len) { const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; u16 control; u8 fixed = sizeof(*prof); u8 info_len = 1; if (len < fixed) return false; control = le16_to_cpu(prof->control); if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) info_len += 6; if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) info_len += 8; if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) { if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) info_len += 2; else info_len += 1; } if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT) info_len += 1; return prof->sta_info_len >= info_len && fixed + prof->sta_info_len - 1 <= len; } /** * ieee80211_mle_basic_sta_prof_bss_param_ch_cnt - get per-STA profile BSS * parameter change count * @prof: the per-STA profile, having been checked with * ieee80211_mle_basic_sta_prof_size_ok() for the correct length * * Return: The BSS parameter change count value if present, 0 otherwise. */ static inline u8 ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta_profile *prof) { u16 control = le16_to_cpu(prof->control); const u8 *pos = prof->variable; if (!(control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)) return 0; if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) pos += 6; if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) pos += 2; if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) pos += 8; if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) pos += 2; if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) { if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) pos += 2; else pos += 1; } return *pos; } #define IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID 0x000f #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT 0x0040 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE 0x0780 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_AP_REM 0 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_OP_PARAM_UPDATE 1 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_ADD_LINK 2 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_DEL_LINK 3 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_NSTR_STATUS 4 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 /** * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link * element sta profile size. * @data: pointer to the sub element data * @len: length of the containing sub element * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, size_t len) { const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; u16 control; u8 fixed = sizeof(*prof); u8 info_len = 1; if (len < fixed) return false; control = le16_to_cpu(prof->control); if (control & IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT) info_len += ETH_ALEN; if (control & IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT) info_len += 2; return prof->sta_info_len >= info_len && fixed + prof->sta_info_len - 1 <= len; } #define IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID 0x000f #define IEEE80211_EPCS_ENA_RESP_BODY_LEN 3 static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) { const struct ieee80211_ttlm_elem *t2l = (const void *)data; u8 control, fixed = sizeof(*t2l), elem_len = 0; if (len < fixed) return false; control = t2l->control; if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) elem_len += 2; if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) elem_len += 3; if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { u8 bm_size; elem_len += 1; if (len < fixed + elem_len) return false; if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) bm_size = 1; else bm_size = 2; elem_len += hweight8(t2l->optional[0]) * bm_size; } return len >= fixed + elem_len; } /** * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay * in microseconds * @eml_cap: EML capabilities field value from common info field of * the Multi-link element * Return: the EMLSR Padding delay (in microseconds) encoded in the * EML Capabilities field */ static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap) { /* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR * Padding Delay subfield. */ u32 pad_delay = u16_get_bits(eml_cap, IEEE80211_EML_CAP_EMLSR_PADDING_DELAY); if (!pad_delay || pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US) return 0; return 32 * (1 << (pad_delay - 1)); } /** * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition * delay in microseconds * @eml_cap: EML capabilities field value from common info field of * the Multi-link element * Return: the EMLSR Transition delay (in microseconds) encoded in the * EML Capabilities field */ static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap) { /* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR * Transition Delay subfield. */ u32 trans_delay = u16_get_bits(eml_cap, IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY); /* invalid values also just use 0 */ if (!trans_delay || trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US) return 0; return 16 * (1 << (trans_delay - 1)); } /** * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition * timeout value in microseconds * @eml_cap: EML capabilities field value from common info field of * the Multi-link element * Return: the EMLSR Transition timeout (in microseconds) encoded in * the EML Capabilities field */ static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap) { /* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the * Transition Timeout subfield. */ u8 timeout = u16_get_bits(eml_cap, IEEE80211_EML_CAP_TRANSITION_TIMEOUT); /* invalid values also just use 0 */ if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU) return 0; return 128 * (1 << (timeout - 1)); } #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ _data + ieee80211_mle_common_size(_data),\ _len - ieee80211_mle_common_size(_data)) /* NAN operation mode, as defined in Wi-Fi Aware (TM) specification Table 81 */ #define NAN_OP_MODE_PHY_MODE_VHT 0x01 #define NAN_OP_MODE_PHY_MODE_HE 0x10 #define NAN_OP_MODE_PHY_MODE_MASK 0x11 #define NAN_OP_MODE_80P80MHZ 0x02 #define NAN_OP_MODE_160MHZ 0x04 #define NAN_OP_MODE_PNDL_SUPPRTED 0x08 /* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification * Table 79 */ #define NAN_DEV_CAPA_DFS_OWNER 0x01 #define NAN_DEV_CAPA_EXT_KEY_ID_SUPPORTED 0x02 #define NAN_DEV_CAPA_SIM_NDP_RX_SUPPORTED 0x04 #define NAN_DEV_CAPA_NDPE_SUPPORTED 0x08 #define NAN_DEV_CAPA_S3_SUPPORTED 0x10 #endif /* LINUX_IEEE80211_H */ |
| 13 2 1 2 2 55 11 11 11 11 53 2 2 2 2 2 1 2 2 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 50 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 13 53 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> * Copyright (C) 2019-2022 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/debugfs.h> #include <linux/random.h> #include <linux/moduleparam.h> #include <linux/ieee80211.h> #include <linux/minmax.h> #include <net/mac80211.h> #include "rate.h" #include "sta_info.h" #include "rc80211_minstrel_ht.h" #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) /* Number of symbols for a packet with (bps) bits per symbol */ #define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ (sgi ? \ ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \ ((syms) * 1000) << 2 /* syms * 4 us */ \ ) /* Transmit duration for the raw data part of an average sized packet */ #define MCS_DURATION(streams, sgi, bps) \ (MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) / AVG_AMPDU_SIZE) #define BW_20 0 #define BW_40 1 #define BW_80 2 /* * Define group sort order: HT40 -> SGI -> #streams */ #define GROUP_IDX(_streams, _sgi, _ht40) \ MINSTREL_HT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * _ht40 + \ MINSTREL_MAX_STREAMS * _sgi + \ _streams - 1 #define _MAX(a, b) (((a)>(b))?(a):(b)) #define GROUP_SHIFT(duration) \ _MAX(0, 16 - __builtin_clz(duration)) /* MCS rate information for an MCS group */ #define __MCS_GROUP(_streams, _sgi, _ht40, _s) \ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _ht40, \ .flags = \ IEEE80211_TX_RC_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) >> _s \ } \ } #define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26)) #define MCS_GROUP(_streams, _sgi, _ht40) \ __MCS_GROUP(_streams, _sgi, _ht40, \ MCS_GROUP_SHIFT(_streams, _sgi, _ht40)) #define VHT_GROUP_IDX(_streams, _sgi, _bw) \ (MINSTREL_VHT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * (_bw) + \ MINSTREL_MAX_STREAMS * (_sgi) + \ (_streams) - 1) #define BW2VBPS(_bw, r3, r2, r1) \ (_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1) #define __VHT_GROUP(_streams, _sgi, _bw, _s) \ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _bw, \ .flags = \ IEEE80211_TX_RC_VHT_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_bw == BW_80 ? IEEE80211_TX_RC_80_MHZ_WIDTH : \ _bw == BW_40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 234, 108, 52)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 351, 162, 78)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 468, 216, 104)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 702, 324, 156)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 936, 432, 208)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1053, 486, 234)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1170, 540, 260)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1404, 648, 312)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1560, 720, 346)) >> _s \ } \ } #define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26))) #define VHT_GROUP(_streams, _sgi, _bw) \ __VHT_GROUP(_streams, _sgi, _bw, \ VHT_GROUP_SHIFT(_streams, _sgi, _bw)) #define CCK_DURATION(_bitrate, _short) \ (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48) + \ (8 * (AVG_PKT_SIZE + 4) * 10) / (_bitrate))) #define CCK_DURATION_LIST(_short, _s) \ CCK_DURATION(10, _short) >> _s, \ CCK_DURATION(20, _short) >> _s, \ CCK_DURATION(55, _short) >> _s, \ CCK_DURATION(110, _short) >> _s #define __CCK_GROUP(_s) \ [MINSTREL_CCK_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ CCK_DURATION_LIST(false, _s), \ CCK_DURATION_LIST(true, _s) \ } \ } #define CCK_GROUP_SHIFT \ GROUP_SHIFT(CCK_DURATION(10, false)) #define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) #define OFDM_DURATION(_bitrate) \ (1000 * (16 /* SIFS + signal ext */ + \ 16 /* T_PREAMBLE */ + \ 4 /* T_SIGNAL */ + \ 4 * (((16 + 80 * (AVG_PKT_SIZE + 4) + 6) / \ ((_bitrate) * 4))))) #define OFDM_DURATION_LIST(_s) \ OFDM_DURATION(60) >> _s, \ OFDM_DURATION(90) >> _s, \ OFDM_DURATION(120) >> _s, \ OFDM_DURATION(180) >> _s, \ OFDM_DURATION(240) >> _s, \ OFDM_DURATION(360) >> _s, \ OFDM_DURATION(480) >> _s, \ OFDM_DURATION(540) >> _s #define __OFDM_GROUP(_s) \ [MINSTREL_OFDM_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ OFDM_DURATION_LIST(_s), \ } \ } #define OFDM_GROUP_SHIFT \ GROUP_SHIFT(OFDM_DURATION(60)) #define OFDM_GROUP __OFDM_GROUP(OFDM_GROUP_SHIFT) static bool minstrel_vht_only = true; module_param(minstrel_vht_only, bool, 0644); MODULE_PARM_DESC(minstrel_vht_only, "Use only VHT rates when VHT is supported by sta."); /* * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they * use. * * Sortorder has to be fixed for GROUP_IDX macro to be applicable: * BW -> SGI -> #streams */ const struct mcs_group minstrel_mcs_groups[] = { MCS_GROUP(1, 0, BW_20), MCS_GROUP(2, 0, BW_20), MCS_GROUP(3, 0, BW_20), MCS_GROUP(4, 0, BW_20), MCS_GROUP(1, 1, BW_20), MCS_GROUP(2, 1, BW_20), MCS_GROUP(3, 1, BW_20), MCS_GROUP(4, 1, BW_20), MCS_GROUP(1, 0, BW_40), MCS_GROUP(2, 0, BW_40), MCS_GROUP(3, 0, BW_40), MCS_GROUP(4, 0, BW_40), MCS_GROUP(1, 1, BW_40), MCS_GROUP(2, 1, BW_40), MCS_GROUP(3, 1, BW_40), MCS_GROUP(4, 1, BW_40), CCK_GROUP, OFDM_GROUP, VHT_GROUP(1, 0, BW_20), VHT_GROUP(2, 0, BW_20), VHT_GROUP(3, 0, BW_20), VHT_GROUP(4, 0, BW_20), VHT_GROUP(1, 1, BW_20), VHT_GROUP(2, 1, BW_20), VHT_GROUP(3, 1, BW_20), VHT_GROUP(4, 1, BW_20), VHT_GROUP(1, 0, BW_40), VHT_GROUP(2, 0, BW_40), VHT_GROUP(3, 0, BW_40), VHT_GROUP(4, 0, BW_40), VHT_GROUP(1, 1, BW_40), VHT_GROUP(2, 1, BW_40), VHT_GROUP(3, 1, BW_40), VHT_GROUP(4, 1, BW_40), VHT_GROUP(1, 0, BW_80), VHT_GROUP(2, 0, BW_80), VHT_GROUP(3, 0, BW_80), VHT_GROUP(4, 0, BW_80), VHT_GROUP(1, 1, BW_80), VHT_GROUP(2, 1, BW_80), VHT_GROUP(3, 1, BW_80), VHT_GROUP(4, 1, BW_80), }; const s16 minstrel_cck_bitrates[4] = { 10, 20, 55, 110 }; const s16 minstrel_ofdm_bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly; static const u8 minstrel_sample_seq[] = { MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_SLOW, }; static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi); /* * Some VHT MCSes are invalid (when Ndbps / Nes is not an integer) * e.g for MCS9@20MHzx1Nss: Ndbps=8x52*(5/6) Nes=1 * * Returns the valid mcs map for struct minstrel_mcs_group_data.supported */ static u16 minstrel_get_valid_vht_rates(int bw, int nss, __le16 mcs_map) { u16 mask = 0; if (bw == BW_20) { if (nss != 3 && nss != 6) mask = BIT(9); } else if (bw == BW_80) { if (nss == 3 || nss == 7) mask = BIT(6); else if (nss == 6) mask = BIT(9); } else { WARN_ON(bw != BW_40); } switch ((le16_to_cpu(mcs_map) >> (2 * (nss - 1))) & 3) { case IEEE80211_VHT_MCS_SUPPORT_0_7: mask |= 0x300; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: mask |= 0x200; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: break; default: mask = 0x3ff; } return 0x3ff & ~mask; } static bool minstrel_ht_is_legacy_group(int group) { return group == MINSTREL_CCK_GROUP || group == MINSTREL_OFDM_GROUP; } /* * Look up an MCS group index based on mac80211 rate information */ static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { return GROUP_IDX((rate->idx / 8) + 1, !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_ht_ri_get_group_idx(struct rate_info *rate) { return GROUP_IDX((rate->mcs / 8) + 1, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40)); } static int minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate) { return VHT_GROUP_IDX(ieee80211_rate_get_vht_nss(rate), !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + 2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_vht_ri_get_group_idx(struct rate_info *rate) { return VHT_GROUP_IDX(rate->nss, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40) + 2*!!(rate->bw & RATE_INFO_BW_80)); } static struct minstrel_rate_stats * minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int group, idx; if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); idx = rate->idx % 8; goto out; } if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { group = minstrel_vht_get_group_idx(rate); idx = ieee80211_rate_get_vht_mcs(rate); goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (!(mi->supported[group] & BIT(idx))) continue; if (rate->idx != mp->cck_rates[idx]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->idx == mp->ofdm_rates[mi->band][idx]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } /* * Get the minstrel rate statistics for specified STA and rate info. */ static struct minstrel_rate_stats * minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int group, idx; struct rate_info *rate = &rate_status->rate_idx; if (rate->flags & RATE_INFO_FLAGS_MCS) { group = minstrel_ht_ri_get_group_idx(rate); idx = rate->mcs % 8; goto out; } if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) { group = minstrel_vht_ri_get_group_idx(rate); idx = rate->mcs; goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && mi->use_short_preamble) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { return &mi->groups[MI_RATE_GROUP(index)].rates[MI_RATE_IDX(index)]; } static inline int minstrel_get_duration(int index) { const struct mcs_group *group = &minstrel_mcs_groups[MI_RATE_GROUP(index)]; unsigned int duration = group->duration[MI_RATE_IDX(index)]; return duration << group->shift; } static unsigned int minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) { int duration; if (mi->avg_ampdu_len) return MINSTREL_TRUNC(mi->avg_ampdu_len); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_tp_rate[0]))) return 1; duration = minstrel_get_duration(mi->max_tp_rate[0]); if (duration > 400 * 1000) return 2; if (duration > 250 * 1000) return 4; if (duration > 150 * 1000) return 8; return 16; } /* * Return current throughput based on the average A-MPDU length, taking into * account the expected number of retransmissions and their expected length */ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, int prob_avg) { unsigned int nsecs = 0, overhead = mi->overhead; unsigned int ampdu_len = 1; /* do not account throughput if success prob is below 10% */ if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; else ampdu_len = minstrel_ht_avg_ampdu_len(mi); nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << minstrel_mcs_groups[group].shift; /* * For the throughput calculation, limit the probability value to 90% to * account for collision related packet error rate fluctuation * (prob is scaled - see MINSTREL_FRAC above) */ if (prob_avg > MINSTREL_FRAC(90, 100)) prob_avg = MINSTREL_FRAC(90, 100); return MINSTREL_TRUNC(100 * ((prob_avg * 1000000) / nsecs)); } /* * Find & sort topmost throughput rates * * If multiple rates provide equal throughput the sorting is based on their * current success probability. Higher success probability is preferred among * MCS groups, CCK rates do not provide aggregation and are therefore at last. */ static void minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, u16 *tp_list) { int cur_group, cur_idx, cur_tp_avg, cur_prob; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int j = MAX_THR_RATES; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg; cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = MI_RATE_GROUP(tp_list[j - 1]); tmp_idx = MI_RATE_IDX(tp_list[j - 1]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (cur_tp_avg < tmp_tp_avg || (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob)) break; j--; } while (j > 0); if (j < MAX_THR_RATES - 1) { memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) * (MAX_THR_RATES - (j + 1)))); } if (j < MAX_THR_RATES) tp_list[j] = index; } /* * Find and set the topmost probability rate per sta and per group */ static void minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int max_tp_group, max_tp_idx, max_tp_prob; int cur_tp_avg, cur_group, cur_idx; int max_gpr_group, max_gpr_idx; int max_gpr_tp_avg, max_gpr_prob; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); mg = &mi->groups[cur_group]; mrs = &mg->rates[cur_idx]; tmp_group = MI_RATE_GROUP(*dest); tmp_idx = MI_RATE_IDX(*dest); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ max_tp_group = MI_RATE_GROUP(mi->max_tp_rate[0]); max_tp_idx = MI_RATE_IDX(mi->max_tp_rate[0]); max_tp_prob = mi->groups[max_tp_group].rates[max_tp_idx].prob_avg; if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index)) && !minstrel_ht_is_legacy_group(max_tp_group)) return; /* skip rates faster than max tp rate with lower prob */ if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) && mrs->prob_avg < max_tp_prob) return; max_gpr_group = MI_RATE_GROUP(mg->max_group_prob_rate); max_gpr_idx = MI_RATE_IDX(mg->max_group_prob_rate); max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) { cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) *dest = index; max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, max_gpr_idx, max_gpr_prob); if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { if (mrs->prob_avg > tmp_prob) *dest = index; if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } } /* * Assign new rate set per sta and use CCK rates only if the fastest * rate (max_tp_rate[0]) is from CCK group. This prohibits such sorted * rate sets where MCS and CCK rates are mixed, because CCK rates can * not use aggregation. */ static void minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], u16 tmp_legacy_tp_rate[MAX_THR_RATES]) { unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; tmp_group = MI_RATE_GROUP(tmp_legacy_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_legacy_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = MI_RATE_GROUP(tmp_mcs_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_mcs_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_legacy_tp_rate[i], tmp_mcs_tp_rate); } } } /* * Try to increase robustness of max_prob rate by decrease number of * streams if possible. */ static inline void minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; if (!mi->sta->deflink.ht_cap.ht_supported) return; group = MI_RATE_GROUP(mi->max_tp_rate[0]); tmp_max_streams = minstrel_mcs_groups[group].streams; for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { mg = &mi->groups[group]; if (!mi->supported[group] || group == MINSTREL_CCK_GROUP) continue; tmp_idx = MI_RATE_IDX(mg->max_group_prob_rate); tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg; if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { mi->max_prob_rate = mg->max_group_prob_rate; tmp_tp = minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob); } } } static u16 __minstrel_ht_get_sample_rate(struct minstrel_ht_sta *mi, enum minstrel_sample_type type) { u16 *rates = mi->sample[type].sample_rates; u16 cur; int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { if (!rates[i]) continue; cur = rates[i]; rates[i] = 0; return cur; } return 0; } static inline int minstrel_ewma(int old, int new, int weight) { int diff, incr; diff = new - old; incr = (EWMA_DIV - weight) * diff / EWMA_DIV; return old + incr; } static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) { s32 out_1 = *prev_1; s32 out_2 = *prev_2; s32 val; if (!in) in += 1; if (!out_1) { val = out_1 = in; goto out; } val = MINSTREL_AVG_COEFF1 * in; val += MINSTREL_AVG_COEFF2 * out_1; val += MINSTREL_AVG_COEFF3 * out_2; val >>= MINSTREL_SCALE; if (val > 1 << MINSTREL_SCALE) val = 1 << MINSTREL_SCALE; if (val < 0) val = 1; out: *prev_2 = out_1; *prev_1 = val; return val; } /* * Recalculate statistics and counters of a given rate */ static void minstrel_ht_calc_rate_stats(struct minstrel_priv *mp, struct minstrel_rate_stats *mrs) { unsigned int cur_prob; if (unlikely(mrs->attempts > 0)) { cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); minstrel_filter_avg_add(&mrs->prob_avg, &mrs->prob_avg_1, cur_prob); mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; } mrs->last_success = mrs->success; mrs->last_attempts = mrs->attempts; mrs->success = 0; mrs->attempts = 0; } static bool minstrel_ht_find_sample_rate(struct minstrel_ht_sta *mi, int type, int idx) { int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { u16 cur = mi->sample[type].sample_rates[i]; if (cur == idx) return true; if (!cur) break; } return false; } static int minstrel_ht_move_sample_rates(struct minstrel_ht_sta *mi, int type, u32 fast_rate_dur, u32 slow_rate_dur) { u16 *rates = mi->sample[type].sample_rates; int i, j; for (i = 0, j = 0; i < MINSTREL_SAMPLE_RATES; i++) { u32 duration; bool valid = false; u16 cur; cur = rates[i]; if (!cur) continue; duration = minstrel_get_duration(cur); switch (type) { case MINSTREL_SAMPLE_TYPE_SLOW: valid = duration > fast_rate_dur && duration < slow_rate_dur; break; case MINSTREL_SAMPLE_TYPE_INC: case MINSTREL_SAMPLE_TYPE_JUMP: valid = duration < fast_rate_dur; break; default: valid = false; break; } if (!valid) { rates[i] = 0; continue; } if (i == j) continue; rates[j++] = cur; rates[i] = 0; } return j; } static int minstrel_ht_group_min_rate_offset(struct minstrel_ht_sta *mi, int group, u32 max_duration) { u16 supported = mi->supported[group]; int i; for (i = 0; i < MCS_GROUP_RATES && supported; i++, supported >>= 1) { if (!(supported & BIT(0))) continue; if (minstrel_get_duration(MI_RATE(group, i)) >= max_duration) continue; return i; } return -1; } /* * Incremental update rates: * Flip through groups and pick the first group rate that is faster than the * highest currently selected rate */ static u16 minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur) { u8 type = MINSTREL_SAMPLE_TYPE_INC; int i, index = 0; u8 group; group = mi->sample[type].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); index = minstrel_ht_group_min_rate_offset(mi, group, fast_rate_dur); if (index < 0) continue; index = MI_RATE(group, index & 0xf); if (!minstrel_ht_find_sample_rate(mi, type, index)) goto out; } index = 0; out: mi->sample[type].sample_group = group; return index; } static int minstrel_ht_next_group_sample_rate(struct minstrel_ht_sta *mi, int group, u16 supported, int offset) { struct minstrel_mcs_group_data *mg = &mi->groups[group]; u16 idx; int i; for (i = 0; i < MCS_GROUP_RATES; i++) { idx = sample_table[mg->column][mg->index]; if (++mg->index >= MCS_GROUP_RATES) { mg->index = 0; if (++mg->column >= ARRAY_SIZE(sample_table)) mg->column = 0; } if (idx < offset) continue; if (!(supported & BIT(idx))) continue; return MI_RATE(group, idx); } return -1; } /* * Jump rates: * Sample random rates, use those that are faster than the highest * currently selected rate. Rates between the fastest and the slowest * get sorted into the slow sample bucket, but only if it has room */ static u16 minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur, u32 slow_rate_dur, int *slow_rate_ofs) { struct minstrel_rate_stats *mrs; u32 max_duration = slow_rate_dur; int i, index, offset; u16 *slow_rates; u16 supported; u32 duration; u8 group; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; slow_rates = mi->sample[MINSTREL_SAMPLE_TYPE_SLOW].sample_rates; group = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { u8 type; group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); supported = mi->supported[group]; if (!supported) continue; offset = minstrel_ht_group_min_rate_offset(mi, group, max_duration); if (offset < 0) continue; index = minstrel_ht_next_group_sample_rate(mi, group, supported, offset); if (index < 0) continue; duration = minstrel_get_duration(index); if (duration < fast_rate_dur) type = MINSTREL_SAMPLE_TYPE_JUMP; else type = MINSTREL_SAMPLE_TYPE_SLOW; if (minstrel_ht_find_sample_rate(mi, type, index)) continue; if (type == MINSTREL_SAMPLE_TYPE_JUMP) goto found; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) continue; if (duration >= slow_rate_dur) continue; /* skip slow rates with high success probability */ mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg > MINSTREL_FRAC(95, 100)) continue; slow_rates[(*slow_rate_ofs)++] = index; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; } index = 0; found: mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group = group; return index; } static void minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi) { u32 prob_dur = minstrel_get_duration(mi->max_prob_rate); u32 tp_dur = minstrel_get_duration(mi->max_tp_rate[0]); u32 tp2_dur = minstrel_get_duration(mi->max_tp_rate[1]); u32 fast_rate_dur = min(min(tp_dur, tp2_dur), prob_dur); u32 slow_rate_dur = max(max(tp_dur, tp2_dur), prob_dur); u16 *rates; int i, j; rates = mi->sample[MINSTREL_SAMPLE_TYPE_INC].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_INC, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_inc_rate(mi, tp_dur); if (!rates[i]) break; i++; } rates = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_JUMP, fast_rate_dur, slow_rate_dur); j = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_SLOW, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_jump_rate(mi, fast_rate_dur, slow_rate_dur, &j); if (!rates[i]) break; i++; } for (i = 0; i < ARRAY_SIZE(mi->sample); i++) memcpy(mi->sample[i].cur_sample_rates, mi->sample[i].sample_rates, sizeof(mi->sample[i].cur_sample_rates)); } /* * Update rate statistics and select new primary rates * * Rules for rate selection: * - max_prob_rate must use only one stream, as a tradeoff between delivery * probability and throughput during strong fluctuations * - as long as the max prob rate has a probability of more than 75%, pick * higher throughput rates, even if the probability is a bit lower */ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; u16 index; bool ht_supported = mi->sta->deflink.ht_cap.ht_supported; if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); else mi->avg_ampdu_len = 0; mi->ampdu_len = 0; mi->ampdu_packets = 0; } if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else if (mi->supported[MINSTREL_OFDM_GROUP]) group = MINSTREL_OFDM_GROUP; else group = 0; index = MI_RATE(group, 0); for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++) tmp_legacy_tp_rate[j] = index; if (mi->supported[MINSTREL_VHT_GROUP_0]) group = MINSTREL_VHT_GROUP_0; else if (ht_supported) group = MINSTREL_HT_GROUP_0; else if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else group = MINSTREL_OFDM_GROUP; index = MI_RATE(group, 0); tmp_max_prob_rate = index; for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { u16 *tp_rate = tmp_mcs_tp_rate; u16 last_prob = 0; mg = &mi->groups[group]; if (!mi->supported[group]) continue; /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) tmp_group_tp_rate[j] = MI_RATE(group, 0); if (group == MINSTREL_CCK_GROUP && ht_supported) tp_rate = tmp_legacy_tp_rate; for (i = MCS_GROUP_RATES - 1; i >= 0; i--) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); mrs = &mg->rates[i]; mrs->retry_updated = false; minstrel_ht_calc_rate_stats(mp, mrs); if (mrs->att_hist) last_prob = max(last_prob, mrs->prob_avg); else mrs->prob_avg = max(last_prob, mrs->prob_avg); cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; /* Find max throughput rate set */ minstrel_ht_sort_best_tp_rates(mi, index, tp_rate); /* Find max throughput rate set within a group */ minstrel_ht_sort_best_tp_rates(mi, index, tmp_group_tp_rate); } memcpy(mg->max_group_tp_rate, tmp_group_tp_rate, sizeof(mg->max_group_tp_rate)); } /* Assign new rate set per sta */ minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_legacy_tp_rate); memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate)); for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { if (!mi->supported[group]) continue; mg = &mi->groups[group]; mg->max_group_prob_rate = MI_RATE(group, 0); for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); /* Find max probability rate per group and global */ minstrel_ht_set_best_prob_rate(mi, &tmp_max_prob_rate, index); } } mi->max_prob_rate = tmp_max_prob_rate; /* Try to increase robustness of max_prob_rate*/ minstrel_ht_prob_rate_reduce_streams(mi); minstrel_ht_refill_sample_rates(mi); #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ if (mp->fixed_rate_idx != -1) { for (i = 0; i < 4; i++) mi->max_tp_rate[i] = mp->fixed_rate_idx; mi->max_prob_rate = mp->fixed_rate_idx; } #endif /* Reset update timer */ mi->last_stats_update = jiffies; mi->sample_time = jiffies; } static bool minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int i; if (rate->idx < 0) return false; if (!rate->count) return false; if (rate->flags & IEEE80211_TX_RC_MCS || rate->flags & IEEE80211_TX_RC_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) if (rate->idx == mp->cck_rates[i]) return true; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) if (rate->idx == mp->ofdm_rates[mi->band][i]) return true; return false; } /* * Check whether rate_status contains valid information. */ static bool minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int i; if (!rate_status) return false; if (!rate_status->try_count) return false; if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS || rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_cck_bitrates[ mp->cck_rates[i] ]) return true; } for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ]) return true; } return false; } static void minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary) { int group, orig_group; orig_group = group = MI_RATE_GROUP(*idx); while (group > 0) { group--; if (!mi->supported[group]) continue; if (minstrel_mcs_groups[group].streams > minstrel_mcs_groups[orig_group].streams) continue; if (primary) *idx = mi->groups[group].max_group_tp_rate[0]; else *idx = mi->groups[group].max_group_tp_rate[1]; break; } } static void minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, void *priv_sta, struct ieee80211_tx_status *st) { struct ieee80211_tx_info *info = st->info; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2; struct minstrel_priv *mp = priv; u32 update_interval = mp->update_interval; bool last, update = false; int i; /* Ignore packet that was sent with noAck flag */ if (info->flags & IEEE80211_TX_CTL_NO_ACK) return; /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; if (!(info->flags & IEEE80211_TX_STAT_AMPDU)) { info->status.ampdu_ack_len = (info->flags & IEEE80211_TX_STAT_ACK ? 1 : 0); info->status.ampdu_len = 1; } /* wraparound */ if (mi->total_packets >= ~0 - info->status.ampdu_len) { mi->total_packets = 0; mi->sample_packets = 0; } mi->total_packets += info->status.ampdu_len; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; mi->ampdu_packets++; mi->ampdu_len += info->status.ampdu_len; if (st->rates && st->n_rates) { last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0])); for (i = 0; !last; i++) { last = (i == st->n_rates - 1) || !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[i + 1])); rate = minstrel_ht_ri_get_stats(mp, mi, &(st->rates[i])); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += st->rates[i].try_count * info->status.ampdu_len; } } else { last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += ar[i].count * info->status.ampdu_len; } } if (mp->hw->max_rates > 1) { /* * check for sudden death of spatial multiplexing, * downgrade to a lower number of streams if necessary. */ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); if (rate->attempts > 30 && rate->success < rate->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); update = true; } rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); if (rate2->attempts > 30 && rate2->success < rate2->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; } } if (time_after(jiffies, mi->last_stats_update + update_interval)) { update = true; minstrel_ht_update_stats(mp, mi); } if (update) minstrel_ht_update_rates(mp, mi); } static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) { struct minstrel_rate_stats *mrs; unsigned int tx_time, tx_time_rtscts, tx_time_data; unsigned int cw = mp->cw_min; unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi); unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) { mrs->retry_count = 1; mrs->retry_count_rtscts = 1; return; } mrs->retry_count = 2; mrs->retry_count_rtscts = 2; mrs->retry_updated = true; tx_time_data = minstrel_get_duration(index) * ampdu_len / 1000; /* Contention time for first 2 tries */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index))) { overhead = mi->overhead_legacy; overhead_rtscts = mi->overhead_legacy_rtscts; } else { overhead = mi->overhead; overhead_rtscts = mi->overhead_rtscts; } /* Total TX time for data and Contention after first 2 tries */ tx_time = ctime + 2 * (overhead + tx_time_data); tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data); /* See how many more tries we can fit inside segment size */ do { /* Contention time for this try */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); /* Total TX time after this try */ tx_time += ctime + overhead + tx_time_data; tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) mrs->retry_count_rtscts++; } while ((tx_time < mp->segment_size) && (++mrs->retry_count < mp->max_retry)); } static void minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { int group_idx = MI_RATE_GROUP(index); const struct mcs_group *group = &minstrel_mcs_groups[group_idx]; struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; mrs = minstrel_get_ratestats(mi, index); if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; } else { ratetbl->rate[offset].count = mrs->retry_count; ratetbl->rate[offset].count_cts = mrs->retry_count; ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } index = MI_RATE_IDX(index); if (group_idx == MINSTREL_CCK_GROUP) idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; else if (group_idx == MINSTREL_OFDM_GROUP) idx = mp->ofdm_rates[mi->band][index % ARRAY_SIZE(mp->ofdm_rates[0])]; else if (flags & IEEE80211_TX_RC_VHT_MCS) idx = ((group->streams - 1) << 4) | (index & 0xF); else idx = index + (group->streams - 1) * 8; /* enable RTS/CTS if needed: * - if station is in dynamic SMPS (and streams > 1) * - for fallback rates, to increase chances of getting through */ if (offset > 0 || (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; flags |= IEEE80211_TX_RC_USE_RTS_CTS; } ratetbl->rate[offset].idx = idx; ratetbl->rate[offset].flags = flags; } static inline int minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate) { int group = MI_RATE_GROUP(rate); rate = MI_RATE_IDX(rate); return mi->groups[group].rates[rate].prob_avg; } static int minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) { int group = MI_RATE_GROUP(mi->max_prob_rate); const struct mcs_group *g = &minstrel_mcs_groups[group]; int rate = MI_RATE_IDX(mi->max_prob_rate); unsigned int duration; /* Disable A-MSDU if max_prob_rate is bad */ if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100)) return 1; duration = g->duration[rate]; duration <<= g->shift; /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */ if (duration > MCS_DURATION(1, 0, 52)) return 500; /* * If the rate is slower than single-stream MCS4, limit A-MSDU to usual * data packet size */ if (duration > MCS_DURATION(1, 0, 104)) return 1600; /* * If the rate is slower than single-stream MCS7, or if the max throughput * rate success probability is less than 75%, limit A-MSDU to twice the usual * data packet size */ if (duration > MCS_DURATION(1, 0, 260) || (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) < MINSTREL_FRAC(75, 100))) return 3200; /* * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes. * Since aggregation sessions are started/stopped without txq flush, use * the limit here to avoid the complexity of having to de-aggregate * packets in the queue. */ if (!mi->sta->deflink.vht_cap.vht_supported) return IEEE80211_MAX_MPDU_LEN_HT_BA; /* unlimited */ return 0; } static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; int i = 0; int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE); rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) return; /* Start with max_tp_rate[0] */ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); /* Fill up remaining, keep one entry for max_probe_rate */ for (; i < (max_rates - 1); i++) minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); if (i < max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); if (i < IEEE80211_TX_RATE_TABLE_SIZE) rates->rate[i].idx = -1; mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); ieee80211_sta_recalc_aggregates(mi->sta); rate_control_set_rates(mp->hw, mi->sta, rates); } static u16 minstrel_ht_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { u8 seq; if (mp->hw->max_rates > 1) { seq = mi->sample_seq; mi->sample_seq = (seq + 1) % ARRAY_SIZE(minstrel_sample_seq); seq = minstrel_sample_seq[seq]; } else { seq = MINSTREL_SAMPLE_TYPE_INC; } return __minstrel_ht_get_sample_rate(mi, seq); } static void minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { const struct mcs_group *sample_group; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_tx_rate *rate = &info->status.rates[0]; struct minstrel_ht_sta *mi = priv_sta; struct minstrel_priv *mp = priv; u16 sample_idx; info->flags |= mi->tx_flags; #ifdef CONFIG_MAC80211_DEBUGFS if (mp->fixed_rate_idx != -1) return; #endif /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) return; if (time_is_after_jiffies(mi->sample_time)) return; mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL; sample_idx = minstrel_ht_get_sample_rate(mp, mi); if (!sample_idx) return; sample_group = &minstrel_mcs_groups[MI_RATE_GROUP(sample_idx)]; sample_idx = MI_RATE_IDX(sample_idx); if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && (sample_idx >= 4) != txrc->short_preamble) return; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group == &minstrel_mcs_groups[MINSTREL_OFDM_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->ofdm_rates[0]); rate->idx = mp->ofdm_rates[mi->band][idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, MI_RATE_IDX(sample_idx), sample_group->streams); } else { rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; } static void minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { int i; if (sband->band != NL80211_BAND_2GHZ) return; if (sta->deflink.ht_cap.ht_supported && !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; for (i = 0; i < 4; i++) { if (mp->cck_rates[i] == 0xff || !rate_supported(sta, sband->band, mp->cck_rates[i])) continue; mi->supported[MINSTREL_CCK_GROUP] |= BIT(i); if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) mi->supported[MINSTREL_CCK_GROUP] |= BIT(i + 4); } } static void minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { const u8 *rates; int i; if (sta->deflink.ht_cap.ht_supported) return; rates = mp->ofdm_rates[sband->band]; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) { if (rates[i] == 0xff || !rate_supported(sta, sband->band, rates[i])) continue; mi->supported[MINSTREL_OFDM_GROUP] |= BIT(i); } } static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { struct minstrel_priv *mp = priv; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs; u16 ht_cap = sta->deflink.ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; const struct ieee80211_rate *ctl_rate; struct sta_info *sta_info; bool ldpc, erp; int use_vht; int ack_dur; int stbc; int i; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB); if (vht_cap->vht_supported) use_vht = vht_cap->vht_mcs.tx_mcs_map != cpu_to_le16(~0); else use_vht = 0; memset(mi, 0, sizeof(*mi)); mi->sta = sta; mi->band = sband->band; mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1); mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; ack_dur = ieee80211_frame_duration(sband->band, 10, ctl_rate->bitrate, erp, 1); mi->overhead_legacy = ack_dur; mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); if (!use_vht) { stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >> IEEE80211_HT_CAP_RX_STBC_SHIFT; ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING; } else { stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >> IEEE80211_VHT_CAP_RXSTBC_SHIFT; ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC; } mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; if (ldpc) mi->tx_flags |= IEEE80211_TX_CTL_LDPC; for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { u32 gflags = minstrel_mcs_groups[i].flags; int bw, nss; mi->supported[i] = 0; if (minstrel_ht_is_legacy_group(i)) continue; if (gflags & IEEE80211_TX_RC_SHORT_GI) { if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) { if (!(ht_cap & IEEE80211_HT_CAP_SGI_40)) continue; } else { if (!(ht_cap & IEEE80211_HT_CAP_SGI_20)) continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH && sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40) continue; nss = minstrel_mcs_groups[i].streams; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1) continue; /* HT rate */ if (gflags & IEEE80211_TX_RC_MCS) { if (use_vht && minstrel_vht_only) continue; mi->supported[i] = mcs->rx_mask[nss - 1]; continue; } /* VHT rate */ if (!vht_cap->vht_supported || WARN_ON(!(gflags & IEEE80211_TX_RC_VHT_MCS)) || WARN_ON(gflags & IEEE80211_TX_RC_160_MHZ_WIDTH)) continue; if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) { if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 || ((gflags & IEEE80211_TX_RC_SHORT_GI) && !(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) { continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) bw = BW_40; else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) bw = BW_80; else bw = BW_20; mi->supported[i] = minstrel_get_valid_vht_rates(bw, nss, vht_cap->vht_mcs.tx_mcs_map); } sta_info = container_of(sta, struct sta_info, sta); mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) && sta_info->sdata->vif.bss_conf.use_short_preamble; minstrel_ht_update_cck(mp, mi, sband, sta); minstrel_ht_update_ofdm(mp, mi, sband, sta); /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); minstrel_ht_update_rates(mp, mi); } static void minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta, u32 changed) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct ieee80211_supported_band *sband; struct minstrel_ht_sta *mi; struct minstrel_priv *mp = priv; struct ieee80211_hw *hw = mp->hw; int max_rates = 0; int i; for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; } return kzalloc(sizeof(*mi), gfp); } static void minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) { kfree(priv_sta); } static void minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband, const s16 *bitrates, int n_rates) { int i, j; for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; for (j = 0; j < n_rates; j++) { if (rate->bitrate != bitrates[j]) continue; dest[j] = i; break; } } } static void minstrel_ht_init_cck_rates(struct minstrel_priv *mp) { static const s16 bitrates[4] = { 10, 20, 55, 110 }; struct ieee80211_supported_band *sband; memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates)); sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->cck_rates, sband, minstrel_cck_bitrates, ARRAY_SIZE(minstrel_cck_bitrates)); } static void minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) { static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; struct ieee80211_supported_band *sband; memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band])); sband = mp->hw->wiphy->bands[band]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband, minstrel_ofdm_bitrates, ARRAY_SIZE(minstrel_ofdm_bitrates)); } static void * minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; int i; mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); if (!mp) return NULL; /* contention window settings * Just an approximation. Using the per-queue values would complicate * the calculations and is probably unnecessary */ mp->cw_min = 15; mp->cw_max = 1023; /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; if (hw->max_rate_tries > 0) mp->max_retry = hw->max_rate_tries; else /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; mp->hw = hw; mp->update_interval = HZ / 20; minstrel_ht_init_cck_rates(mp); for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) minstrel_ht_init_ofdm_rates(mp, i); return mp; } #ifdef CONFIG_MAC80211_DEBUGFS static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, struct dentry *debugfsdir) { struct minstrel_priv *mp = priv; mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); } #endif static void minstrel_ht_free(void *priv) { kfree(priv); } static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { struct minstrel_ht_sta *mi = priv_sta; int i, j, prob, tp_avg; i = MI_RATE_GROUP(mi->max_tp_rate[0]); j = MI_RATE_IDX(mi->max_tp_rate[0]); prob = mi->groups[i].rates[j].prob_avg; /* convert tp_avg from pkt per second in kbps */ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024; return tp_avg; } static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER, .tx_status_ext = minstrel_ht_tx_status, .get_rate = minstrel_ht_get_rate, .rate_init = minstrel_ht_rate_init, .rate_update = minstrel_ht_rate_update, .alloc_sta = minstrel_ht_alloc_sta, .free_sta = minstrel_ht_free_sta, .alloc = minstrel_ht_alloc, .free = minstrel_ht_free, #ifdef CONFIG_MAC80211_DEBUGFS .add_debugfs = minstrel_ht_add_debugfs, .add_sta_debugfs = minstrel_ht_add_sta_debugfs, #endif .get_expected_throughput = minstrel_ht_get_expected_throughput, }; static void __init init_sample_table(void) { int col, i, new_idx; u8 rnd[MCS_GROUP_RATES]; memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { get_random_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { new_idx = (i + rnd[i]) % MCS_GROUP_RATES; while (sample_table[col][new_idx] != 0xff) new_idx = (new_idx + 1) % MCS_GROUP_RATES; sample_table[col][new_idx] = i; } } } int __init rc80211_minstrel_init(void) { init_sample_table(); return ieee80211_rate_control_register(&mac80211_minstrel_ht); } void rc80211_minstrel_exit(void) { ieee80211_rate_control_unregister(&mac80211_minstrel_ht); } |
| 35 34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 | // SPDX-License-Identifier: GPL-2.0 /* * PCI Express I/O Virtualization (IOV) support * Single Root IOV 1.0 * Address Translation Service 1.0 * * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com> */ #include <linux/bitfield.h> #include <linux/bits.h> #include <linux/log2.h> #include <linux/pci.h> #include <linux/sizes.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/string.h> #include <linux/delay.h> #include <asm/div64.h> #include "pci.h" #define VIRTFN_ID_LEN 17 /* "virtfn%u\0" for 2^32 - 1 */ int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id) { if (!dev->is_physfn) return -EINVAL; return dev->bus->number + ((dev->devfn + dev->sriov->offset + dev->sriov->stride * vf_id) >> 8); } int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) { if (!dev->is_physfn) return -EINVAL; return (dev->devfn + dev->sriov->offset + dev->sriov->stride * vf_id) & 0xff; } EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn); int pci_iov_vf_id(struct pci_dev *dev) { struct pci_dev *pf; if (!dev->is_virtfn) return -EINVAL; pf = pci_physfn(dev); return (pci_dev_id(dev) - (pci_dev_id(pf) + pf->sriov->offset)) / pf->sriov->stride; } EXPORT_SYMBOL_GPL(pci_iov_vf_id); /** * pci_iov_get_pf_drvdata - Return the drvdata of a PF * @dev: VF pci_dev * @pf_driver: Device driver required to own the PF * * This must be called from a context that ensures that a VF driver is attached. * The value returned is invalid once the VF driver completes its remove() * callback. * * Locking is achieved by the driver core. A VF driver cannot be probed until * pci_enable_sriov() is called and pci_disable_sriov() does not return until * all VF drivers have completed their remove(). * * The PF driver must call pci_disable_sriov() before it begins to destroy the * drvdata. */ void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver) { struct pci_dev *pf_dev; if (!dev->is_virtfn) return ERR_PTR(-EINVAL); pf_dev = dev->physfn; if (pf_dev->driver != pf_driver) return ERR_PTR(-EINVAL); return pci_get_drvdata(pf_dev); } EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata); /* * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may * change when NumVFs changes. * * Update iov->offset and iov->stride when NumVFs is written. */ static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) { struct pci_sriov *iov = dev->sriov; pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); } /* * The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride * determine how many additional bus numbers will be consumed by VFs. * * Iterate over all valid NumVFs, validate offset and stride, and calculate * the maximum number of bus numbers that could ever be required. */ static int compute_max_vf_buses(struct pci_dev *dev) { struct pci_sriov *iov = dev->sriov; int nr_virtfn, busnr, rc = 0; for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) { pci_iov_set_numvfs(dev, nr_virtfn); if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) { rc = -EIO; goto out; } busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1); if (busnr > iov->max_VF_buses) iov->max_VF_buses = busnr; } out: pci_iov_set_numvfs(dev, 0); return rc; } static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) { struct pci_bus *child; if (bus->number == busnr) return bus; child = pci_find_bus(pci_domain_nr(bus), busnr); if (child) return child; child = pci_add_new_bus(bus, NULL, busnr); if (!child) return NULL; pci_bus_insert_busn_res(child, busnr, busnr); return child; } static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus) { if (physbus != virtbus && list_empty(&virtbus->devices)) pci_remove_bus(virtbus); } resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { if (!dev->is_physfn) return 0; return dev->sriov->barsz[pci_resource_num_to_vf_bar(resno)]; } void pci_iov_resource_set_size(struct pci_dev *dev, int resno, resource_size_t size) { if (!pci_resource_is_iov(resno)) { pci_warn(dev, "%s is not an IOV resource\n", pci_resource_name(dev, resno)); return; } dev->sriov->barsz[pci_resource_num_to_vf_bar(resno)] = size; } bool pci_iov_is_memory_decoding_enabled(struct pci_dev *dev) { u16 cmd; pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_CTRL, &cmd); return cmd & PCI_SRIOV_CTRL_MSE; } static void pci_read_vf_config_common(struct pci_dev *virtfn) { struct pci_dev *physfn = virtfn->physfn; /* * Some config registers are the same across all associated VFs. * Read them once from VF0 so we can skip reading them from the * other VFs. * * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to * have the same Revision ID and Subsystem ID, but we assume they * do. */ pci_read_config_dword(virtfn, PCI_CLASS_REVISION, &physfn->sriov->class); pci_read_config_byte(virtfn, PCI_HEADER_TYPE, &physfn->sriov->hdr_type); pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID, &physfn->sriov->subsystem_vendor); pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID, &physfn->sriov->subsystem_device); } int pci_iov_sysfs_link(struct pci_dev *dev, struct pci_dev *virtfn, int id) { char buf[VIRTFN_ID_LEN]; int rc; sprintf(buf, "virtfn%u", id); rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); if (rc) goto failed; rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); if (rc) goto failed1; kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); return 0; failed1: sysfs_remove_link(&dev->dev.kobj, buf); failed: return rc; } #ifdef CONFIG_PCI_MSI static ssize_t sriov_vf_total_msix_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); u32 vf_total_msix = 0; device_lock(dev); if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix) goto unlock; vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev); unlock: device_unlock(dev); return sysfs_emit(buf, "%u\n", vf_total_msix); } static DEVICE_ATTR_RO(sriov_vf_total_msix); static ssize_t sriov_vf_msix_count_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *vf_dev = to_pci_dev(dev); struct pci_dev *pdev = pci_physfn(vf_dev); int val, ret = 0; if (kstrtoint(buf, 0, &val) < 0) return -EINVAL; if (val < 0) return -EINVAL; device_lock(&pdev->dev); if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) { ret = -EOPNOTSUPP; goto err_pdev; } device_lock(&vf_dev->dev); if (vf_dev->driver) { /* * A driver is already attached to this VF and has configured * itself based on the current MSI-X vector count. Changing * the vector size could mess up the driver, so block it. */ ret = -EBUSY; goto err_dev; } ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val); err_dev: device_unlock(&vf_dev->dev); err_pdev: device_unlock(&pdev->dev); return ret ? : count; } static DEVICE_ATTR_WO(sriov_vf_msix_count); #endif static struct attribute *sriov_vf_dev_attrs[] = { #ifdef CONFIG_PCI_MSI &dev_attr_sriov_vf_msix_count.attr, #endif NULL, }; static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct pci_dev *pdev = to_pci_dev(dev); if (!pdev->is_virtfn) return 0; return a->mode; } const struct attribute_group sriov_vf_dev_attr_group = { .attrs = sriov_vf_dev_attrs, .is_visible = sriov_vf_attrs_are_visible, }; static struct pci_dev *pci_iov_scan_device(struct pci_dev *dev, int id, struct pci_bus *bus) { struct pci_sriov *iov = dev->sriov; struct pci_dev *virtfn; int rc; virtfn = pci_alloc_dev(bus); if (!virtfn) return ERR_PTR(-ENOMEM); virtfn->devfn = pci_iov_virtfn_devfn(dev, id); virtfn->vendor = dev->vendor; virtfn->device = iov->vf_device; virtfn->is_virtfn = 1; virtfn->physfn = pci_dev_get(dev); virtfn->no_command_memory = 1; if (id == 0) pci_read_vf_config_common(virtfn); rc = pci_setup_device(virtfn); if (rc) { pci_dev_put(dev); pci_bus_put(virtfn->bus); kfree(virtfn); return ERR_PTR(rc); } return virtfn; } int pci_iov_add_virtfn(struct pci_dev *dev, int id) { struct pci_bus *bus; struct pci_dev *virtfn; struct resource *res; int rc, i; u64 size; bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id)); if (!bus) { rc = -ENOMEM; goto failed; } virtfn = pci_iov_scan_device(dev, id, bus); if (IS_ERR(virtfn)) { rc = PTR_ERR(virtfn); goto failed0; } virtfn->dev.parent = dev->dev.parent; virtfn->multifunction = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { int idx = pci_resource_num_from_vf_bar(i); res = &dev->resource[idx]; if (!res->parent) continue; virtfn->resource[i].name = pci_name(virtfn); virtfn->resource[i].flags = res->flags; size = pci_iov_resource_size(dev, idx); resource_set_range(&virtfn->resource[i], res->start + size * id, size); rc = request_resource(res, &virtfn->resource[i]); BUG_ON(rc); } pci_device_add(virtfn, virtfn->bus); rc = pci_iov_sysfs_link(dev, virtfn, id); if (rc) goto failed1; pci_bus_add_device(virtfn); return 0; failed1: pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); failed0: virtfn_remove_bus(dev->bus, bus); failed: return rc; } void pci_iov_remove_virtfn(struct pci_dev *dev, int id) { char buf[VIRTFN_ID_LEN]; struct pci_dev *virtfn; virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus), pci_iov_virtfn_bus(dev, id), pci_iov_virtfn_devfn(dev, id)); if (!virtfn) return; sprintf(buf, "virtfn%u", id); sysfs_remove_link(&dev->dev.kobj, buf); /* * pci_stop_dev() could have been called for this virtfn already, * so the directory for the virtfn may have been removed before. * Double check to avoid spurious sysfs warnings. */ if (virtfn->dev.kobj.sd) sysfs_remove_link(&virtfn->dev.kobj, "physfn"); pci_stop_and_remove_bus_device(virtfn); virtfn_remove_bus(dev->bus, virtfn->bus); /* balance pci_get_domain_bus_and_slot() */ pci_dev_put(virtfn); pci_dev_put(dev); } static ssize_t sriov_totalvfs_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); return sysfs_emit(buf, "%u\n", pci_sriov_get_totalvfs(pdev)); } static ssize_t sriov_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); u16 num_vfs; /* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */ device_lock(&pdev->dev); num_vfs = pdev->sriov->num_VFs; device_unlock(&pdev->dev); return sysfs_emit(buf, "%u\n", num_vfs); } /* * num_vfs > 0; number of VFs to enable * num_vfs = 0; disable all VFs * * Note: SRIOV spec does not allow partial VF * disable, so it's all or none. */ static ssize_t sriov_numvfs_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); int ret = 0; u16 num_vfs; if (kstrtou16(buf, 0, &num_vfs) < 0) return -EINVAL; if (num_vfs > pci_sriov_get_totalvfs(pdev)) return -ERANGE; device_lock(&pdev->dev); if (num_vfs == pdev->sriov->num_VFs) goto exit; /* is PF driver loaded */ if (!pdev->driver) { pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n"); ret = -ENOENT; goto exit; } /* is PF driver loaded w/callback */ if (!pdev->driver->sriov_configure) { pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n"); ret = -ENOENT; goto exit; } if (num_vfs == 0) { /* disable VFs */ ret = pdev->driver->sriov_configure(pdev, 0); goto exit; } /* enable VFs */ if (pdev->sriov->num_VFs) { pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n", pdev->sriov->num_VFs, num_vfs); ret = -EBUSY; goto exit; } ret = pdev->driver->sriov_configure(pdev, num_vfs); if (ret < 0) goto exit; if (ret != num_vfs) pci_warn(pdev, "%d VFs requested; only %d enabled\n", num_vfs, ret); exit: device_unlock(&pdev->dev); if (ret < 0) return ret; return count; } static ssize_t sriov_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); return sysfs_emit(buf, "%u\n", pdev->sriov->offset); } static ssize_t sriov_stride_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); return sysfs_emit(buf, "%u\n", pdev->sriov->stride); } static ssize_t sriov_vf_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); return sysfs_emit(buf, "%x\n", pdev->sriov->vf_device); } static ssize_t sriov_drivers_autoprobe_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); return sysfs_emit(buf, "%u\n", pdev->sriov->drivers_autoprobe); } static ssize_t sriov_drivers_autoprobe_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); bool drivers_autoprobe; if (kstrtobool(buf, &drivers_autoprobe) < 0) return -EINVAL; pdev->sriov->drivers_autoprobe = drivers_autoprobe; return count; } static DEVICE_ATTR_RO(sriov_totalvfs); static DEVICE_ATTR_RW(sriov_numvfs); static DEVICE_ATTR_RO(sriov_offset); static DEVICE_ATTR_RO(sriov_stride); static DEVICE_ATTR_RO(sriov_vf_device); static DEVICE_ATTR_RW(sriov_drivers_autoprobe); static struct attribute *sriov_pf_dev_attrs[] = { &dev_attr_sriov_totalvfs.attr, &dev_attr_sriov_numvfs.attr, &dev_attr_sriov_offset.attr, &dev_attr_sriov_stride.attr, &dev_attr_sriov_vf_device.attr, &dev_attr_sriov_drivers_autoprobe.attr, #ifdef CONFIG_PCI_MSI &dev_attr_sriov_vf_total_msix.attr, #endif NULL, }; static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); if (!dev_is_pf(dev)) return 0; return a->mode; } const struct attribute_group sriov_pf_dev_attr_group = { .attrs = sriov_pf_dev_attrs, .is_visible = sriov_pf_attrs_are_visible, }; int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { return 0; } int __weak pcibios_sriov_disable(struct pci_dev *pdev) { return 0; } static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs) { unsigned int i; int rc; if (dev->no_vf_scan) return 0; pci_lock_rescan_remove(); for (i = 0; i < num_vfs; i++) { rc = pci_iov_add_virtfn(dev, i); if (rc) goto failed; } pci_unlock_rescan_remove(); return 0; failed: while (i--) pci_iov_remove_virtfn(dev, i); pci_unlock_rescan_remove(); return rc; } static int sriov_enable(struct pci_dev *dev, int nr_virtfn) { int rc; int i; int nres; u16 initial; struct resource *res; struct pci_dev *pdev; struct pci_sriov *iov = dev->sriov; int bars = 0; int bus; if (!nr_virtfn) return 0; if (iov->num_VFs) return -EINVAL; pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); if (initial > iov->total_VFs || (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs))) return -EIO; if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs || (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) return -EINVAL; nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { int idx = pci_resource_num_from_vf_bar(i); resource_size_t vf_bar_sz = pci_iov_resource_size(dev, idx); bars |= (1 << idx); res = &dev->resource[idx]; if (vf_bar_sz * nr_virtfn > resource_size(res)) continue; if (res->parent) nres++; } if (nres != iov->nres) { pci_err(dev, "not enough MMIO resources for SR-IOV\n"); return -ENOMEM; } bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1); if (bus > dev->bus->busn_res.end) { pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n", nr_virtfn, bus, &dev->bus->busn_res); return -ENOMEM; } if (pci_enable_resources(dev, bars)) { pci_err(dev, "SR-IOV: IOV BARS not allocated\n"); return -ENOMEM; } if (iov->link != dev->devfn) { pdev = pci_get_slot(dev->bus, iov->link); if (!pdev) return -ENODEV; if (!pdev->is_physfn) { pci_dev_put(pdev); return -ENOSYS; } rc = sysfs_create_link(&dev->dev.kobj, &pdev->dev.kobj, "dep_link"); pci_dev_put(pdev); if (rc) return rc; } iov->initial_VFs = initial; if (nr_virtfn < initial) initial = nr_virtfn; rc = pcibios_sriov_enable(dev, initial); if (rc) { pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc); goto err_pcibios; } pci_iov_set_numvfs(dev, nr_virtfn); iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); msleep(100); pci_cfg_access_unlock(dev); rc = sriov_add_vfs(dev, initial); if (rc) goto err_pcibios; kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); iov->num_VFs = nr_virtfn; return 0; err_pcibios: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); ssleep(1); pci_cfg_access_unlock(dev); pcibios_sriov_disable(dev); if (iov->link != dev->devfn) sysfs_remove_link(&dev->dev.kobj, "dep_link"); pci_iov_set_numvfs(dev, 0); return rc; } static void sriov_del_vfs(struct pci_dev *dev) { struct pci_sriov *iov = dev->sriov; int i; pci_lock_rescan_remove(); for (i = 0; i < iov->num_VFs; i++) pci_iov_remove_virtfn(dev, i); pci_unlock_rescan_remove(); } static void sriov_disable(struct pci_dev *dev) { struct pci_sriov *iov = dev->sriov; if (!iov->num_VFs) return; sriov_del_vfs(dev); iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); ssleep(1); pci_cfg_access_unlock(dev); pcibios_sriov_disable(dev); if (iov->link != dev->devfn) sysfs_remove_link(&dev->dev.kobj, "dep_link"); iov->num_VFs = 0; pci_iov_set_numvfs(dev, 0); } static int sriov_init(struct pci_dev *dev, int pos) { int i, bar64; int rc; int nres; u32 pgsz; u16 ctrl, total; struct pci_sriov *iov; struct resource *res; const char *res_name; struct pci_dev *pdev; u32 sriovbars[PCI_SRIOV_NUM_BARS]; pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); if (ctrl & PCI_SRIOV_CTRL_VFE) { pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); ssleep(1); } ctrl = 0; list_for_each_entry(pdev, &dev->bus->devices, bus_list) if (pdev->is_physfn) goto found; pdev = NULL; if (pci_ari_enabled(dev->bus)) ctrl |= PCI_SRIOV_CTRL_ARI; found: pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); if (!total) return 0; pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; pgsz &= ~((1 << i) - 1); if (!pgsz) return -EIO; pgsz &= ~(pgsz - 1); pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); iov = kzalloc(sizeof(*iov), GFP_KERNEL); if (!iov) return -ENOMEM; /* Sizing SR-IOV BARs with VF Enable cleared - no decode */ __pci_size_stdbars(dev, PCI_SRIOV_NUM_BARS, pos + PCI_SRIOV_BAR, sriovbars); nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { int idx = pci_resource_num_from_vf_bar(i); res = &dev->resource[idx]; res_name = pci_resource_name(dev, idx); /* * If it is already FIXED, don't change it, something * (perhaps EA or header fixups) wants it this way. */ if (res->flags & IORESOURCE_PCI_FIXED) bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0; else bar64 = __pci_read_base(dev, pci_bar_unknown, res, pos + PCI_SRIOV_BAR + i * 4, &sriovbars[i]); if (!res->flags) continue; if (resource_size(res) & (PAGE_SIZE - 1)) { rc = -EIO; goto failed; } iov->barsz[i] = resource_size(res); resource_set_size(res, resource_size(res) * total); pci_info(dev, "%s %pR: contains BAR %d for %d VFs\n", res_name, res, i, total); i += bar64; nres++; } iov->pos = pos; iov->nres = nres; iov->ctrl = ctrl; iov->total_VFs = total; iov->driver_max_VFs = total; pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device); iov->pgsz = pgsz; iov->self = dev; iov->drivers_autoprobe = true; pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link); iov->vf_rebar_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VF_REBAR); if (pdev) iov->dev = pci_dev_get(pdev); else iov->dev = dev; dev->sriov = iov; dev->is_physfn = 1; rc = compute_max_vf_buses(dev); if (rc) goto fail_max_buses; return 0; fail_max_buses: dev->sriov = NULL; dev->is_physfn = 0; failed: for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &dev->resource[pci_resource_num_from_vf_bar(i)]; res->flags = 0; } kfree(iov); return rc; } static void sriov_release(struct pci_dev *dev) { BUG_ON(dev->sriov->num_VFs); if (dev != dev->sriov->dev) pci_dev_put(dev->sriov->dev); kfree(dev->sriov); dev->sriov = NULL; } static void sriov_restore_vf_rebar_state(struct pci_dev *dev) { unsigned int pos, nbars, i; u32 ctrl; pos = pci_iov_vf_rebar_cap(dev); if (!pos) return; pci_read_config_dword(dev, pos + PCI_VF_REBAR_CTRL, &ctrl); nbars = FIELD_GET(PCI_VF_REBAR_CTRL_NBAR_MASK, ctrl); for (i = 0; i < nbars; i++, pos += 8) { int bar_idx, size; pci_read_config_dword(dev, pos + PCI_VF_REBAR_CTRL, &ctrl); bar_idx = FIELD_GET(PCI_VF_REBAR_CTRL_BAR_IDX, ctrl); size = pci_rebar_bytes_to_size(dev->sriov->barsz[bar_idx]); ctrl &= ~PCI_VF_REBAR_CTRL_BAR_SIZE; ctrl |= FIELD_PREP(PCI_VF_REBAR_CTRL_BAR_SIZE, size); pci_write_config_dword(dev, pos + PCI_VF_REBAR_CTRL, ctrl); } } static void sriov_restore_state(struct pci_dev *dev) { int i; u16 ctrl; struct pci_sriov *iov = dev->sriov; pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); if (ctrl & PCI_SRIOV_CTRL_VFE) return; /* * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI. */ ctrl &= ~PCI_SRIOV_CTRL_ARI; ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI; pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl); for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) pci_update_resource(dev, pci_resource_num_from_vf_bar(i)); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); pci_iov_set_numvfs(dev, iov->num_VFs); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); } /** * pci_iov_init - initialize the IOV capability * @dev: the PCI device * * Returns 0 on success, or negative on failure. */ int pci_iov_init(struct pci_dev *dev) { int pos; if (!pci_is_pcie(dev)) return -ENODEV; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); if (pos) return sriov_init(dev, pos); return -ENODEV; } /** * pci_iov_release - release resources used by the IOV capability * @dev: the PCI device */ void pci_iov_release(struct pci_dev *dev) { if (dev->is_physfn) sriov_release(dev); } /** * pci_iov_remove - clean up SR-IOV state after PF driver is detached * @dev: the PCI device */ void pci_iov_remove(struct pci_dev *dev) { struct pci_sriov *iov = dev->sriov; if (!dev->is_physfn) return; iov->driver_max_VFs = iov->total_VFs; if (iov->num_VFs) pci_warn(dev, "driver left SR-IOV enabled after remove\n"); } /** * pci_iov_update_resource - update a VF BAR * @dev: the PCI device * @resno: the resource number * * Update a VF BAR in the SR-IOV capability of a PF. */ void pci_iov_update_resource(struct pci_dev *dev, int resno) { struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL; struct resource *res = pci_resource_n(dev, resno); int vf_bar = pci_resource_num_to_vf_bar(resno); struct pci_bus_region region; u16 cmd; u32 new; int reg; /* * The generic pci_restore_bars() path calls this for all devices, * including VFs and non-SR-IOV devices. If this is not a PF, we * have nothing to do. */ if (!iov) return; pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd); if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) { dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n", vf_bar, res); return; } /* * Ignore unimplemented BARs, unused resource slots for 64-bit * BARs, and non-movable resources, e.g., those described via * Enhanced Allocation. */ if (!res->flags) return; if (res->flags & IORESOURCE_UNSET) return; if (res->flags & IORESOURCE_PCI_FIXED) return; pcibios_resource_to_bus(dev->bus, ®ion, res); new = region.start; new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar; pci_write_config_dword(dev, reg, new); if (res->flags & IORESOURCE_MEM_64) { new = region.start >> 16 >> 16; pci_write_config_dword(dev, reg + 4, new); } } resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev, int resno) { return pci_iov_resource_size(dev, resno); } /** * pci_sriov_resource_alignment - get resource alignment for VF BAR * @dev: the PCI device * @resno: the resource number * * Returns the alignment of the VF BAR found in the SR-IOV capability. * This is not the same as the resource size which is defined as * the VF BAR size multiplied by the number of VFs. The alignment * is just the VF BAR size. */ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) { return pcibios_iov_resource_alignment(dev, resno); } /** * pci_restore_iov_state - restore the state of the IOV capability * @dev: the PCI device */ void pci_restore_iov_state(struct pci_dev *dev) { if (dev->is_physfn) { sriov_restore_vf_rebar_state(dev); sriov_restore_state(dev); } } /** * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs * @dev: the PCI device * @auto_probe: set VF drivers auto probe flag */ void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe) { if (dev->is_physfn) dev->sriov->drivers_autoprobe = auto_probe; } /** * pci_iov_bus_range - find bus range used by Virtual Function * @bus: the PCI bus * * Returns max number of buses (exclude current one) used by Virtual * Functions. */ int pci_iov_bus_range(struct pci_bus *bus) { int max = 0; struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { if (!dev->is_physfn) continue; if (dev->sriov->max_VF_buses > max) max = dev->sriov->max_VF_buses; } return max ? max - bus->number : 0; } /** * pci_enable_sriov - enable the SR-IOV capability * @dev: the PCI device * @nr_virtfn: number of virtual functions to enable * * Returns 0 on success, or negative on failure. */ int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { might_sleep(); if (!dev->is_physfn) return -ENOSYS; return sriov_enable(dev, nr_virtfn); } EXPORT_SYMBOL_GPL(pci_enable_sriov); /** * pci_disable_sriov - disable the SR-IOV capability * @dev: the PCI device */ void pci_disable_sriov(struct pci_dev *dev) { might_sleep(); if (!dev->is_physfn) return; sriov_disable(dev); } EXPORT_SYMBOL_GPL(pci_disable_sriov); /** * pci_num_vf - return number of VFs associated with a PF device_release_driver * @dev: the PCI device * * Returns number of VFs, or 0 if SR-IOV is not enabled. */ int pci_num_vf(struct pci_dev *dev) { if (!dev->is_physfn) return 0; return dev->sriov->num_VFs; } EXPORT_SYMBOL_GPL(pci_num_vf); /** * pci_vfs_assigned - returns number of VFs are assigned to a guest * @dev: the PCI device * * Returns number of VFs belonging to this device that are assigned to a guest. * If device is not a physical function returns 0. */ int pci_vfs_assigned(struct pci_dev *dev) { struct pci_dev *vfdev; unsigned int vfs_assigned = 0; unsigned short dev_id; /* only search if we are a PF */ if (!dev->is_physfn) return 0; /* * determine the device ID for the VFs, the vendor ID will be the * same as the PF so there is no need to check for that one */ dev_id = dev->sriov->vf_device; /* loop through all the VFs to see if we own any that are assigned */ vfdev = pci_get_device(dev->vendor, dev_id, NULL); while (vfdev) { /* * It is considered assigned if it is a virtual function with * our dev as the physical function and the assigned bit is set */ if (vfdev->is_virtfn && (vfdev->physfn == dev) && pci_is_dev_assigned(vfdev)) vfs_assigned++; vfdev = pci_get_device(dev->vendor, dev_id, vfdev); } return vfs_assigned; } EXPORT_SYMBOL_GPL(pci_vfs_assigned); /** * pci_sriov_set_totalvfs -- reduce the TotalVFs available * @dev: the PCI PF device * @numvfs: number that should be used for TotalVFs supported * * Should be called from PF driver's probe routine with * device's mutex held. * * Returns 0 if PF is an SRIOV-capable device and * value of numvfs valid. If not a PF return -ENOSYS; * if numvfs is invalid return -EINVAL; * if VFs already enabled, return -EBUSY. */ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) { if (!dev->is_physfn) return -ENOSYS; if (numvfs > dev->sriov->total_VFs) return -EINVAL; /* Shouldn't change if VFs already enabled */ if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE) return -EBUSY; dev->sriov->driver_max_VFs = numvfs; return 0; } EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs); /** * pci_sriov_get_totalvfs -- get total VFs supported on this device * @dev: the PCI PF device * * For a PCIe device with SRIOV support, return the PCIe * SRIOV capability value of TotalVFs or the value of driver_max_VFs * if the driver reduced it. Otherwise 0. */ int pci_sriov_get_totalvfs(struct pci_dev *dev) { if (!dev->is_physfn) return 0; return dev->sriov->driver_max_VFs; } EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs); /** * pci_sriov_configure_simple - helper to configure SR-IOV * @dev: the PCI device * @nr_virtfn: number of virtual functions to enable, 0 to disable * * Enable or disable SR-IOV for devices that don't require any PF setup * before enabling SR-IOV. Return value is negative on error, or number of * VFs allocated on success. */ int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn) { int rc; might_sleep(); if (!dev->is_physfn) return -ENODEV; if (pci_vfs_assigned(dev)) { pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n"); return -EPERM; } if (nr_virtfn == 0) { sriov_disable(dev); return 0; } rc = sriov_enable(dev, nr_virtfn); if (rc < 0) return rc; return nr_virtfn; } EXPORT_SYMBOL_GPL(pci_sriov_configure_simple); /** * pci_iov_vf_bar_set_size - set a new size for a VF BAR * @dev: the PCI device * @resno: the resource number * @size: new size as defined in the spec (0=1MB, 31=128TB) * * Set the new size of a VF BAR that supports VF resizable BAR capability. * Unlike pci_resize_resource(), this does not cause the resource that * reserves the MMIO space (originally up to total_VFs) to be resized, which * means that following calls to pci_enable_sriov() can fail if the resources * no longer fit. * * Return: 0 on success, or negative on failure. */ int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) { u32 sizes; int ret; if (!pci_resource_is_iov(resno)) return -EINVAL; if (pci_iov_is_memory_decoding_enabled(dev)) return -EBUSY; sizes = pci_rebar_get_possible_sizes(dev, resno); if (!sizes) return -ENOTSUPP; if (!(sizes & BIT(size))) return -EINVAL; ret = pci_rebar_set_size(dev, resno, size); if (ret) return ret; pci_iov_resource_set_size(dev, resno, pci_rebar_size_to_bytes(size)); return 0; } EXPORT_SYMBOL_GPL(pci_iov_vf_bar_set_size); /** * pci_iov_vf_bar_get_sizes - get VF BAR sizes allowing to create up to num_vfs * @dev: the PCI device * @resno: the resource number * @num_vfs: number of VFs * * Get the sizes of a VF resizable BAR that can accommodate @num_vfs within * the currently assigned size of the resource @resno. * * Return: A bitmask of sizes in format defined in the spec (bit 0=1MB, * bit 31=128TB). */ u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) { u64 vf_len = pci_resource_len(dev, resno); u32 sizes; if (!num_vfs) return 0; do_div(vf_len, num_vfs); sizes = (roundup_pow_of_two(vf_len + 1) - 1) >> ilog2(SZ_1M); return sizes & pci_rebar_get_possible_sizes(dev, resno); } EXPORT_SYMBOL_GPL(pci_iov_vf_bar_get_sizes); |
| 2 2 31 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 25 6 2 3 1 1 4 2 3 25 25 25 25 25 25 25 25 25 25 25 3 25 25 25 25 29 29 29 29 29 23 7 3 25 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 4 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include <linux/device.h> #include <net/genetlink.h> #include <net/sock.h> #include "devl_internal.h" struct devlink_info_req { struct sk_buff *msg; void (*version_cb)(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv); void *version_cb_priv; }; struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; }; static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = { { /* can't reinitialize driver with no down time */ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT, .limit = DEVLINK_RELOAD_LIMIT_NO_RESET, }, }; static bool devlink_reload_combination_is_invalid(enum devlink_reload_action action, enum devlink_reload_limit limit) { int i; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) if (devlink_reload_invalid_combinations[i].action == action && devlink_reload_invalid_combinations[i].limit == limit) return true; return false; } static bool devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action) { return test_bit(action, &devlink->ops->reload_actions); } static bool devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit) { return test_bit(limit, &devlink->ops->reload_limits); } static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value) { struct nlattr *reload_stats_entry; reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY); if (!reload_stats_entry) return -EMSGSIZE; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value)) goto nla_put_failure; nla_nest_end(msg, reload_stats_entry); return 0; nla_put_failure: nla_nest_cancel(msg, reload_stats_entry); return -EMSGSIZE; } static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote) { struct nlattr *reload_stats_attr, *act_info, *act_stats; int i, j, stat_idx; u32 value; if (!is_remote) reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS); else reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS); if (!reload_stats_attr) return -EMSGSIZE; for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || i == DEVLINK_RELOAD_ACTION_UNSPEC) continue; act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); if (!act_info) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) goto action_info_nest_cancel; act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); if (!act_stats) goto action_info_nest_cancel; for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { /* Remote stats are shown even if not locally supported. * Stats of actions with unspecified limit are shown * though drivers don't need to register unspecified * limit. */ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && !devlink_reload_limit_is_supported(devlink, j)) || devlink_reload_combination_is_invalid(i, j)) continue; stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i; if (!is_remote) value = devlink->stats.reload_stats[stat_idx]; else value = devlink->stats.remote_reload_stats[stat_idx]; if (devlink_reload_stat_put(msg, j, value)) goto action_stats_nest_cancel; } nla_nest_end(msg, act_stats); nla_nest_end(msg, act_info); } nla_nest_end(msg, reload_stats_attr); return 0; action_stats_nest_cancel: nla_nest_cancel(msg, act_stats); action_info_nest_cancel: nla_nest_cancel(msg, act_info); nla_put_failure: nla_nest_cancel(msg, reload_stats_attr); return -EMSGSIZE; } static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) { unsigned long rel_index; void *unused; int err; xa_for_each(&devlink->nested_rels, rel_index, unused) { err = devlink_rel_devlink_handle_put(msg, devlink, rel_index, DEVLINK_ATTR_NESTED_DEVLINK, NULL); if (err) return err; } return 0; } static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { struct nlattr *dev_stats; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed)) goto nla_put_failure; dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS); if (!dev_stats) goto nla_put_failure; if (devlink_reload_stats_put(msg, devlink, false)) goto dev_stats_nest_cancel; if (devlink_reload_stats_put(msg, devlink, true)) goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); if (devlink_nl_nested_fill(msg, devlink)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; dev_stats_nest_cancel: nla_nest_cancel(msg, dev_stats); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); WARN_ON(!devl_is_registered(devlink)); if (!devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } devlink_nl_notify_send(devlink, msg); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); } int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) { devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, u32 rel_index) { xa_erase(&devlink->nested_rels, rel_index); } int devl_nested_devlink_set(struct devlink *devlink, struct devlink *nested_devlink) { u32 rel_index; int err; err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, devlink_rel_notify_cb, devlink_rel_cleanup_cb, nested_devlink); if (err) return err; return xa_insert(&devlink->nested_rels, rel_index, xa_mk_value(0), GFP_KERNEL); } EXPORT_SYMBOL_GPL(devl_nested_devlink_set); void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); devlink_linecards_notify_register(devlink); devlink_ports_notify_register(devlink); devlink_trap_policers_notify_register(devlink); devlink_trap_groups_notify_register(devlink); devlink_traps_notify_register(devlink); devlink_rates_notify_register(devlink); devlink_regions_notify_register(devlink); devlink_params_notify_register(devlink); } void devlink_notify_unregister(struct devlink *devlink) { devlink_params_notify_unregister(devlink); devlink_regions_notify_unregister(devlink); devlink_rates_notify_unregister(devlink); devlink_traps_notify_unregister(devlink); devlink_trap_groups_notify_unregister(devlink); devlink_trap_policers_notify_unregister(devlink); devlink_ports_notify_unregister(devlink); devlink_linecards_notify_unregister(devlink); devlink_notify(devlink, DEVLINK_CMD_DEL); } static void devlink_reload_failed_set(struct devlink *devlink, bool reload_failed) { if (devlink->reload_failed == reload_failed) return; devlink->reload_failed = reload_failed; devlink_notify(devlink, DEVLINK_CMD_NEW); } bool devlink_is_reload_failed(const struct devlink *devlink) { return devlink->reload_failed; } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); static void __devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats, enum devlink_reload_limit limit, u32 actions_performed) { unsigned long actions = actions_performed; int stat_idx; int action; for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) { stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action; reload_stats[stat_idx]++; } devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit, actions_performed); } /** * devlink_remote_reload_actions_performed - Update devlink on reload actions * performed which are not a direct result of devlink reload call. * * This should be called by a driver after performing reload actions in case it was not * a result of devlink reload call. For example fw_activate was performed as a result * of devlink reload triggered fw_activate on another host. * The motivation for this function is to keep data on reload actions performed on this * function whether it was done due to direct devlink reload call or not. * * @devlink: devlink * @limit: reload limit * @actions_performed: bitmask of actions performed */ void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { if (WARN_ON(!actions_performed || actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) || limit > DEVLINK_RELOAD_LIMIT_MAX)) return; __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit, actions_performed); } EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed); static struct net *devlink_netns_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; struct net *net; if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); return ERR_PTR(-EINVAL); } if (netns_pid_attr) { net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); } else if (netns_fd_attr) { net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); } else if (netns_id_attr) { net = get_net_ns_by_id(sock_net(skb->sk), nla_get_u32(netns_id_attr)); if (!net) net = ERR_PTR(-EINVAL); } else { WARN_ON(1); net = ERR_PTR(-EINVAL); } if (IS_ERR(net)) { NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); return ERR_PTR(-EINVAL); } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EPERM); } return net; } static void devlink_reload_netns_change(struct devlink *devlink, struct net *curr_net, struct net *dest_net) { /* Userspace needs to be notified about devlink objects * removed from original and entering new network namespace. * The rest of the devlink objects are re-created during * reload process so the notifications are generated separatelly. */ devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); devlink_rel_nested_in_notify(devlink); } static void devlink_reload_reinit_sanity_check(struct devlink *devlink) { WARN_ON(!list_empty(&devlink->trap_policer_list)); WARN_ON(!list_empty(&devlink->trap_group_list)); WARN_ON(!list_empty(&devlink->trap_list)); WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); WARN_ON(!list_empty(&devlink->rate_list)); WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!xa_empty(&devlink->ports)); } int devlink_reload(struct devlink *devlink, struct net *dest_net, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; struct net *curr_net; int err; /* Make sure the reload operations are invoked with the device lock * held to allow drivers to trigger functionality that expects it * (e.g., PCI reset) and to close possible races between these * operations and probe/remove. */ device_lock_assert(devlink->dev); memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack); if (err) return err; curr_net = devlink_net(devlink); if (dest_net && !net_eq(dest_net, curr_net)) devlink_reload_netns_change(devlink, curr_net, dest_net); if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { devlink_params_driverinit_load_new(devlink); devlink_reload_reinit_sanity_check(devlink); } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); if (err) return err; WARN_ON(!(*actions_performed & BIT(action))); /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats))); devlink_reload_stats_update(devlink, limit, *actions_performed); return 0; } static int devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed, enum devlink_command cmd, struct genl_info *info) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed, actions_performed)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return -EMSGSIZE; } int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_reload_action action; enum devlink_reload_limit limit; struct net *dest_net = NULL; u32 actions_performed; int err; err = devlink_resources_validate(devlink, NULL, info); if (err) { NL_SET_ERR_MSG(info->extack, "resources size validation failed"); return err; } action = nla_get_u8_default(info->attrs[DEVLINK_ATTR_RELOAD_ACTION], DEVLINK_RELOAD_ACTION_DRIVER_REINIT); if (!devlink_reload_action_is_supported(devlink, action)) { NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver"); return -EOPNOTSUPP; } limit = DEVLINK_RELOAD_LIMIT_UNSPEC; if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) { struct nla_bitfield32 limits; u32 limits_selected; limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]); limits_selected = limits.value & limits.selector; if (!limits_selected) { NL_SET_ERR_MSG(info->extack, "Invalid limit selected"); return -EINVAL; } for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++) if (limits_selected & BIT(limit)) break; /* UAPI enables multiselection, but currently it is not used */ if (limits_selected != BIT(limit)) { NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported"); return -EOPNOTSUPP; } if (!devlink_reload_limit_is_supported(devlink, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver"); return -EOPNOTSUPP; } if (devlink_reload_combination_is_invalid(action, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action"); return -EINVAL; } } if (info->attrs[DEVLINK_ATTR_NETNS_PID] || info->attrs[DEVLINK_ATTR_NETNS_FD] || info->attrs[DEVLINK_ATTR_NETNS_ID]) { dest_net = devlink_netns_get(skb, info); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); if (!net_eq(dest_net, devlink_net(devlink)) && action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { NL_SET_ERR_MSG_MOD(info->extack, "Changing namespace is only supported for reinit action"); return -EOPNOTSUPP; } } err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) put_net(dest_net); if (err) return err; /* For backward compatibility generate reply only if attributes used by user */ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) return 0; return devlink_nl_reload_actions_performed_snd(devlink, actions_performed, DEVLINK_CMD_RELOAD, info); } bool devlink_reload_actions_valid(const struct devlink_ops *ops) { const struct devlink_reload_combination *comb; int i; if (!devlink_reload_supported(ops)) { if (WARN_ON(ops->reload_actions)) return false; return true; } if (WARN_ON(!ops->reload_actions || ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX))) return false; if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) || ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX))) return false; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) { comb = &devlink_reload_invalid_combinations[i]; if (ops->reload_actions == BIT(comb->action) && ops->reload_limits == BIT(comb->limit)) return false; } return true; } static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; void *hdr; int err = 0; u16 mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = devlink_nl_put_handle(msg, devlink); if (err) goto nla_put_failure; if (ops->eswitch_mode_get) { err = ops->eswitch_mode_get(devlink, &mode); if (err) goto nla_put_failure; err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); if (err) goto nla_put_failure; } if (ops->eswitch_inline_mode_get) { err = ops->eswitch_inline_mode_get(devlink, &inline_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, inline_mode); if (err) goto nla_put_failure; } if (ops->eswitch_encap_mode_get) { err = ops->eswitch_encap_mode_get(devlink, &encap_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode); if (err) goto nla_put_failure; } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; int err = 0; u16 mode; if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) { if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); err = devlink_rate_nodes_check(devlink, mode, info->extack); if (err) return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) { if (!ops->eswitch_inline_mode_set) return -EOPNOTSUPP; inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); err = ops->eswitch_inline_mode_set(devlink, inline_mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) { if (!ops->eswitch_encap_mode_set) return -EOPNOTSUPP; encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); err = ops->eswitch_encap_mode_set(devlink, encap_mode, info->extack); if (err) return err; } return 0; } int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, bsn); } EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { struct nlattr *nest; int err; if (req->version_cb) req->version_cb(version_name, version_type, req->version_cb_priv); if (!req->msg || !*version_value) return 0; nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME, version_name); if (err) goto nla_put_failure; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE, version_value); if (err) goto nla_put_failure; nla_nest_end(req->msg, nest); return 0; nla_put_failure: nla_nest_cancel(req->msg, nest); return err; } int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); int devlink_info_version_stored_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext); int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put); int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); static int devlink_nl_driver_info_get(struct device_driver *drv, struct devlink_info_req *req) { if (!drv) return 0; if (drv->name[0]) return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, drv->name); return 0; } static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; req.msg = msg; if (devlink->ops->info_get) { err = devlink->ops->info_get(devlink, &req, extack); if (err) goto err_cancel_msg; } err = devlink_nl_driver_info_get(dev->driver, &req); if (err) goto err_cancel_msg; genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { int err; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); if (err == -EOPNOTSUPP) err = 0; return err; } int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one); } static int devlink_nl_flash_update_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { void *hdr; hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS) goto out; if (params->status_msg && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG, params->status_msg)) goto nla_put_failure; if (params->component && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, params->component)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, params->done)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, params->total)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, params->timeout)) goto nla_put_failure; out: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void __devlink_flash_update_notify(struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE && cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_flash_update_fill(msg, devlink, cmd, params); if (err) goto out_free_msg; devlink_nl_notify_send(devlink, msg); return; out_free_msg: nlmsg_free(msg); } static void devlink_flash_update_begin_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE, ¶ms); } static void devlink_flash_update_end_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_END, ¶ms); } void devlink_flash_update_status_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long done, unsigned long total) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .done = done, .total = total, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, ¶ms); } EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify); void devlink_flash_update_timeout_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long timeout) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .timeout = timeout, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, ¶ms); } EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); struct devlink_flash_component_lookup_ctx { const char *lookup_name; bool lookup_name_found; }; static void devlink_flash_component_lookup_cb(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv) { struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv; if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT || lookup_ctx->lookup_name_found) return; lookup_ctx->lookup_name_found = !strcmp(lookup_ctx->lookup_name, version_name); } static int devlink_flash_component_get(struct devlink *devlink, struct nlattr *nla_component, const char **p_component, struct netlink_ext_ack *extack) { struct devlink_flash_component_lookup_ctx lookup_ctx = {}; struct devlink_info_req req = {}; const char *component; int ret; if (!nla_component) return 0; component = nla_data(nla_component); if (!devlink->ops->info_get) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "component update is not supported by this device"); return -EOPNOTSUPP; } lookup_ctx.lookup_name = component; req.version_cb = devlink_flash_component_lookup_cb; req.version_cb_priv = &lookup_ctx; ret = devlink->ops->info_get(devlink, &req, NULL); if (ret) return ret; if (!lookup_ctx.lookup_name_found) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "selected component is not supported by this device"); return -EINVAL; } *p_component = component; return 0; } int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; const char *file_name; u32 supported_params; int ret; if (!devlink->ops->flash_update) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME)) return -EINVAL; ret = devlink_flash_component_get(devlink, info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT], ¶ms.component, info->extack); if (ret) return ret; supported_params = devlink->ops->supported_flash_update_params; nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK]; if (nla_overwrite_mask) { struct nla_bitfield32 sections; if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) { NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask, "overwrite settings are not supported by this device"); return -EOPNOTSUPP; } sections = nla_get_bitfield32(nla_overwrite_mask); params.overwrite_mask = sections.value & sections.selector; } nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]; file_name = nla_data(nla_file_name); ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file"); return ret; } devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, info->extack); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); return ret; } static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { struct devlink_info_req req = {}; const struct nlattr *nlattr; struct sk_buff *msg; int rem, err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; req.msg = msg; err = devlink->ops->info_get(devlink, &req, NULL); if (err) goto free_msg; nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING, (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE, nlattr, rem_kv) { strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } } free_msg: nlmsg_consume(msg); } void devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { if (!devlink->ops->info_get) return; devl_lock(devlink); if (devl_is_registered(devlink)) __devlink_compat_running_version(devlink, buf, len); devl_unlock(devlink); } int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) { struct devlink_flash_update_params params = {}; int ret; devl_lock(devlink); if (!devl_is_registered(devlink)) { ret = -ENODEV; goto out_unlock; } if (!devlink->ops->flash_update) { ret = -EOPNOTSUPP; goto out_unlock; } ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) goto out_unlock; devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, NULL); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); out_unlock: devl_unlock(devlink); return ret; } static int devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct nlattr *selftests; void *hdr; int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, DEVLINK_CMD_SELFTESTS_GET); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto err_cancel_msg; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { if (devlink->ops->selftest_check(devlink, i, extack)) { err = nla_put_flag(msg, i); if (err) goto err_cancel_msg; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; if (!devlink->ops->selftest_check) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { if (!devlink->ops->selftest_check) return 0; return devlink_nl_selftests_fill(msg, devlink, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); } int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one); } static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, enum devlink_selftest_status test_status) { struct nlattr *result_attr; result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); if (!result_attr) return -EMSGSIZE; if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, test_status)) goto nla_put_failure; nla_nest_end(skb, result_attr); return 0; nla_put_failure: nla_nest_cancel(skb, result_attr); return -EMSGSIZE; } static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; struct devlink *devlink = info->user_ptr[0]; struct nlattr *attrs, *selftests; struct sk_buff *msg; void *hdr; int err; int i; if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS)) return -EINVAL; attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, devlink_selftest_nl_policy, info->extack); if (err < 0) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = -EMSGSIZE; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto genlmsg_cancel; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { enum devlink_selftest_status test_status; if (nla_get_flag(tb[i])) { if (!devlink->ops->selftest_check(devlink, i, info->extack)) { if (devlink_selftest_result_put(msg, i, DEVLINK_SELFTEST_STATUS_SKIP)) goto selftests_nest_cancel; continue; } test_status = devlink->ops->selftest_run(devlink, i, info->extack); if (devlink_selftest_result_put(msg, i, test_status)) goto selftests_nest_cancel; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); selftests_nest_cancel: nla_nest_cancel(msg, selftests); genlmsg_cancel: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return err; } |
| 3 6 2 5 3 3 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <linux/splice.h> #include <uapi/linux/io_uring.h> #include "filetable.h" #include "io_uring.h" #include "splice.h" struct io_splice { struct file *file_out; loff_t off_out; loff_t off_in; u64 len; int splice_fd_in; unsigned int flags; struct io_rsrc_node *rsrc_node; }; static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); sp->rsrc_node = NULL; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_tee_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) return -EINVAL; return __io_splice_prep(req, sqe); } void io_splice_cleanup(struct io_kiocb *req) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); if (sp->rsrc_node) io_put_rsrc_node(req->ctx, sp->rsrc_node); } static struct file *io_splice_get_file(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct io_ring_ctx *ctx = req->ctx; struct io_rsrc_node *node; struct file *file = NULL; if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) return io_file_get_normal(req, sp->splice_fd_in); io_ring_submit_lock(ctx, issue_flags); node = io_rsrc_node_lookup(&ctx->file_table.data, sp->splice_fd_in); if (node) { node->refs++; sp->rsrc_node = node; file = io_slot_file(node); req->flags |= REQ_F_NEED_CLEANUP; } io_ring_submit_unlock(ctx, issue_flags); return file; } int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); sp->off_in = READ_ONCE(sqe->splice_off_in); sp->off_out = READ_ONCE(sqe->off); return __io_splice_prep(req, sqe); } int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; if (sp->len) ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } |
| 5 5 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/spinlock.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> static LIST_HEAD(ax25_dev_list); DEFINE_SPINLOCK(ax25_dev_lock); ax25_dev *ax25_addr_ax25dev(ax25_address *addr) { ax25_dev *ax25_dev, *res = NULL; spin_lock_bh(&ax25_dev_lock); list_for_each_entry(ax25_dev, &ax25_dev_list, list) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); break; } spin_unlock_bh(&ax25_dev_lock); return res; } /* * This is called when an interface is brought up. These are * reasonable defaults. */ void ax25_dev_device_up(struct net_device *dev) { ax25_dev *ax25_dev; ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_KERNEL); if (!ax25_dev) { printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); return; } refcount_set(&ax25_dev->refcount, 1); ax25_dev->dev = dev; netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; ax25_dev->device_up = true; ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE; ax25_dev->values[AX25_VALUES_BACKOFF] = AX25_DEF_BACKOFF; ax25_dev->values[AX25_VALUES_CONMODE] = AX25_DEF_CONMODE; ax25_dev->values[AX25_VALUES_WINDOW] = AX25_DEF_WINDOW; ax25_dev->values[AX25_VALUES_EWINDOW] = AX25_DEF_EWINDOW; ax25_dev->values[AX25_VALUES_T1] = AX25_DEF_T1; ax25_dev->values[AX25_VALUES_T2] = AX25_DEF_T2; ax25_dev->values[AX25_VALUES_T3] = AX25_DEF_T3; ax25_dev->values[AX25_VALUES_IDLE] = AX25_DEF_IDLE; ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2; ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN; ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL; #ifdef CONFIG_AX25_DAMA_SLAVE ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT; #endif #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_ds_setup_timer(ax25_dev); #endif spin_lock_bh(&ax25_dev_lock); list_add(&ax25_dev->list, &ax25_dev_list); rcu_assign_pointer(dev->ax25_ptr, ax25_dev); spin_unlock_bh(&ax25_dev_lock); ax25_register_dev_sysctl(ax25_dev); } void ax25_dev_device_down(struct net_device *dev) { ax25_dev *s, *ax25_dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; ax25_unregister_dev_sysctl(ax25_dev); spin_lock_bh(&ax25_dev_lock); #ifdef CONFIG_AX25_DAMA_SLAVE timer_shutdown_sync(&ax25_dev->dama.slave_timer); #endif /* * Remove any packet forwarding that points to this device. */ list_for_each_entry(s, &ax25_dev_list, list) if (s->forward == dev) s->forward = NULL; list_for_each_entry(s, &ax25_dev_list, list) { if (s == ax25_dev) { list_del(&s->list); break; } } RCU_INIT_POINTER(dev->ax25_ptr, NULL); spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) { ax25_dev *ax25_dev, *fwd_dev; if ((ax25_dev = ax25_addr_ax25dev(&fwd->port_from)) == NULL) return -EINVAL; switch (cmd) { case SIOCAX25ADDFWD: fwd_dev = ax25_addr_ax25dev(&fwd->port_to); if (!fwd_dev) { ax25_dev_put(ax25_dev); return -EINVAL; } if (ax25_dev->forward) { ax25_dev_put(fwd_dev); ax25_dev_put(ax25_dev); return -EINVAL; } ax25_dev->forward = fwd_dev->dev; ax25_dev_put(fwd_dev); ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: if (!ax25_dev->forward) { ax25_dev_put(ax25_dev); return -EINVAL; } ax25_dev->forward = NULL; ax25_dev_put(ax25_dev); break; default: ax25_dev_put(ax25_dev); return -EINVAL; } return 0; } struct net_device *ax25_fwd_dev(struct net_device *dev) { ax25_dev *ax25_dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return dev; if (ax25_dev->forward == NULL) return dev; return ax25_dev->forward; } /* * Free all memory associated with device structures. */ void __exit ax25_dev_free(void) { ax25_dev *s, *n; spin_lock_bh(&ax25_dev_lock); list_for_each_entry_safe(s, n, &ax25_dev_list, list) { netdev_put(s->dev, &s->dev_tracker); list_del(&s->list); ax25_dev_put(s); } spin_unlock_bh(&ax25_dev_lock); } |
| 2 3 3 3 3 18 3 3 3 18 18 18 18 13 18 18 13 13 18 10 10 10 9 10 10 10 10 10 10 10 10 10 10 10 9 1 8 1 7 1 6 6 6 6 6 3 3 3 6 3 3 6 2 6 10 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 14 13 12 2 10 10 9 2 14 10 10 10 10 10 10 10 10 10 10 10 10 10 10 1 10 10 10 10 10 1 10 16 8 16 3 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_gred.c Generic Random Early Detection queue. * * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 * * 991129: - Bug fix with grio mode * - a better sing. AvgQ mode with Grio(WRED) * - A finer grained VQ dequeue based on suggestion * from Ren Liu * - More error checks * * For all the glorious comments look at include/net/red.h */ #include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/red.h> #define GRED_DEF_PRIO (MAX_DPs / 2) #define GRED_VQ_MASK (MAX_DPs - 1) #define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP) struct gred_sched_data; struct gred_sched; struct gred_sched_data { u32 limit; /* HARD maximal queue length */ u32 DP; /* the drop parameters */ u32 red_flags; /* virtualQ version of red_flags */ u64 bytesin; /* bytes seen on virtualQ so far*/ u32 packetsin; /* packets seen on virtualQ so far*/ u32 backlog; /* bytes on the virtualQ */ u8 prio; /* the prio of this vq */ struct red_parms parms; struct red_vars vars; struct red_stats stats; }; enum { GRED_WRED_MODE = 1, GRED_RIO_MODE, }; struct gred_sched { struct gred_sched_data *tab[MAX_DPs]; unsigned long flags; u32 red_flags; u32 DPs; u32 def; struct red_vars wred_set; struct tc_gred_qopt_offload *opt; }; static inline int gred_wred_mode(struct gred_sched *table) { return test_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_enable_wred_mode(struct gred_sched *table) { __set_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_disable_wred_mode(struct gred_sched *table) { __clear_bit(GRED_WRED_MODE, &table->flags); } static inline int gred_rio_mode(struct gred_sched *table) { return test_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_enable_rio_mode(struct gred_sched *table) { __set_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_disable_rio_mode(struct gred_sched *table) { __clear_bit(GRED_RIO_MODE, &table->flags); } static inline int gred_wred_mode_check(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; int n; if (q == NULL) continue; for (n = i + 1; n < table->DPs; n++) if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } return 0; } static inline unsigned int gred_backlog(struct gred_sched *table, struct gred_sched_data *q, struct Qdisc *sch) { if (gred_wred_mode(table)) return sch->qstats.backlog; else return q->backlog; } static inline u16 tc_index_to_dp(struct sk_buff *skb) { return skb->tc_index & GRED_VQ_MASK; } static inline void gred_load_wred_set(const struct gred_sched *table, struct gred_sched_data *q) { q->vars.qavg = table->wred_set.qavg; q->vars.qidlestart = table->wred_set.qidlestart; } static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; table->wred_set.qidlestart = q->vars.qidlestart; } static int gred_use_ecn(struct gred_sched_data *q) { return q->red_flags & TC_RED_ECN; } static int gred_use_harddrop(struct gred_sched_data *q) { return q->red_flags & TC_RED_HARDDROP; } static bool gred_per_vq_red_flags_used(struct gred_sched *table) { unsigned int i; /* Local per-vq flags couldn't have been set unless global are 0 */ if (table->red_flags) return false; for (i = 0; i < MAX_DPs; i++) if (table->tab[i] && table->tab[i]->red_flags) return true; return false; } static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; struct gred_sched *t = qdisc_priv(sch); unsigned long qavg = 0; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { dp = t->def; q = t->tab[dp]; if (!q) { /* Pass through packets not assigned to a DP * if no default DP has been configured. This * allows for DP flows to be left untouched. */ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; } /* fix tc_index? --could be controversial but needed for requeueing */ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; for (i = 0; i < t->DPs; i++) { if (t->tab[i] && t->tab[i]->prio < q->prio && !red_is_idling(&t->tab[i]->vars)) qavg += t->tab[i]->vars.qavg; } } q->packetsin++; q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); q->vars.qavg = red_calc_qavg(&q->parms, &q->vars, gred_backlog(t, q, sch)); if (red_is_idling(&q->vars)) red_end_of_idle_period(&q->vars); if (gred_wred_mode(t)) gred_store_wred_set(t, q); switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { case RED_DONT_MARK: break; case RED_PROB_MARK: qdisc_qstats_overlimit(sch); if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.prob_drop++; goto congestion_drop; } q->stats.prob_mark++; break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); if (gred_use_harddrop(q) || !gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.forced_drop++; goto congestion_drop; } q->stats.forced_mark++; break; } if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } q->stats.pdrop++; drop: return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); congestion_drop: qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); return NET_XMIT_CN; } static struct sk_buff *gred_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct gred_sched *t = qdisc_priv(sch); skb = qdisc_dequeue_head(sch); if (skb) { struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", tc_index_to_dp(skb)); } else { q->backlog -= qdisc_pkt_len(skb); if (gred_wred_mode(t)) { if (!sch->qstats.backlog) red_start_of_idle_period(&t->wred_set); } else { if (!q->backlog) red_start_of_idle_period(&q->vars); } } return skb; } return NULL; } static void gred_reset(struct Qdisc *sch) { int i; struct gred_sched *t = qdisc_priv(sch); qdisc_reset_queue(sch); for (i = 0; i < t->DPs; i++) { struct gred_sched_data *q = t->tab[i]; if (!q) continue; red_restart(&q->vars); q->backlog = 0; } } static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) { struct gred_sched *table = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_gred_qopt_offload *opt = table->opt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; memset(opt, 0, sizeof(*opt)); opt->command = command; opt->handle = sch->handle; opt->parent = sch->parent; if (command == TC_GRED_REPLACE) { unsigned int i; opt->set.grio_on = gred_rio_mode(table); opt->set.wred_on = gred_wred_mode(table); opt->set.dp_cnt = table->DPs; opt->set.dp_def = table->def; for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; if (!q) continue; opt->set.tab[i].present = true; opt->set.tab[i].limit = q->limit; opt->set.tab[i].prio = q->prio; opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; opt->set.tab[i].is_ecn = gred_use_ecn(q); opt->set.tab[i].is_harddrop = gred_use_harddrop(q); opt->set.tab[i].probability = q->parms.max_P; opt->set.tab[i].backlog = &q->backlog; } opt->set.qstats = &sch->qstats; } dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); } static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; u64 bytes = 0, packets = 0; unsigned int i; int ret; hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL); if (!hw_stats) return -ENOMEM; hw_stats->command = TC_GRED_STATS; hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; for (i = 0; i < MAX_DPs; i++) { gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload * ended but driver still wants to adjust the values. */ sch_tree_lock(sch); for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } _bstats_update(&sch->bstats, bytes, packets); sch_tree_unlock(sch); kfree(hw_stats); return ret; } static inline void gred_destroy_vq(struct gred_sched_data *q) { kfree(q); } static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_sopt *sopt; bool red_flags_changed; int i; if (!dps) return -EINVAL; sopt = nla_data(dps); if (sopt->DPs > MAX_DPs) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high"); return -EINVAL; } if (sopt->DPs == 0) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues can't be 0"); return -EINVAL; } if (sopt->def_DP >= sopt->DPs) { NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count"); return -EINVAL; } if (sopt->flags && gred_per_vq_red_flags_used(table)) { NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used"); return -EINVAL; } sch_tree_lock(sch); table->DPs = sopt->DPs; table->def = sopt->def_DP; red_flags_changed = table->red_flags != sopt->flags; table->red_flags = sopt->flags; /* * Every entry point to GRED is synchronized with the above code * and the DP is checked against DPs, i.e. shadowed VQs can no * longer be found so we can unlock right here. */ sch_tree_unlock(sch); if (sopt->grio) { gred_enable_rio_mode(table); gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } else { gred_disable_rio_mode(table); gred_disable_wred_mode(table); } if (red_flags_changed) for (i = 0; i < table->DPs; i++) if (table->tab[i]) table->tab[i]->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } } gred_offload(sch, TC_GRED_REPLACE); return 0; } static inline int gred_change_vq(struct Qdisc *sch, int dp, struct tc_gred_qopt *ctl, int prio, u8 *stab, u32 max_P, struct gred_sched_data **prealloc, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; if (!q) return -ENOMEM; q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; } q->DP = dp; q->prio = prio; if (ctl->limit > sch->limit) q->limit = sch->limit; else q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, stab, max_P); red_set_vars(&q->vars); return 0; } static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = { [TCA_GRED_VQ_DP] = { .type = NLA_U32 }, [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 }, }; static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = { [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED }, }; static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, [TCA_GRED_LIMIT] = { .type = NLA_U32 }, [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED }, }; static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (tb[TCA_GRED_VQ_FLAGS]) table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); } static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs) { const struct nlattr *attr; int rem; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: gred_vq_apply(table, attr); break; } } } static int gred_vq_validate(struct gred_sched *table, u32 cdp, const struct nlattr *entry, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; int err; u32 dp; err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, extack); if (err < 0) return err; if (!tb[TCA_GRED_VQ_DP]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified"); return -EINVAL; } dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (dp >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds"); return -EINVAL; } if (dp != cdp && !table->tab[dp]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated"); return -EINVAL; } if (tb[TCA_GRED_VQ_FLAGS]) { u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); if (table->red_flags && table->red_flags != red_flags) { NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used"); return -EINVAL; } if (red_flags & ~GRED_VQ_RED_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "invalid RED flags specified"); return -EINVAL; } } return 0; } static int gred_vqs_validate(struct gred_sched *table, u32 cdp, struct nlattr *vqs, struct netlink_ext_ack *extack) { const struct nlattr *attr; int rem, err; err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, gred_vqe_policy, extack); if (err < 0) return err; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: err = gred_vq_validate(table, cdp, attr, extack); if (err) return err; break; default: NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes"); return -EINVAL; } } if (rem > 0) { NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list"); return -EINVAL; } return 0; } static int gred_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt *ctl; struct nlattr *tb[TCA_GRED_MAX + 1]; int err, prio = GRED_DEF_PRIO; u8 *stab; u32 max_P; struct gred_sched_data *prealloc; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } if (tb[TCA_GRED_PARMS] == NULL || tb[TCA_GRED_STAB] == NULL || tb[TCA_GRED_LIMIT] != NULL) { NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time"); return -EINVAL; } max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0); ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); if (ctl->DP >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count"); return -EINVAL; } if (tb[TCA_GRED_VQ_LIST]) { err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST], extack); if (err) return err; } if (gred_rio_mode(table)) { if (ctl->prio == 0) { int def_prio = GRED_DEF_PRIO; if (table->tab[table->def]) def_prio = table->tab[table->def]->prio; printk(KERN_DEBUG "GRED: DP %u does not have a prio " "setting default to %d\n", ctl->DP, def_prio); prio = def_prio; } else prio = ctl->prio; } prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); sch_tree_lock(sch); err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc, extack); if (err < 0) goto err_unlock_free; if (tb[TCA_GRED_VQ_LIST]) gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]); if (gred_rio_mode(table)) { gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } sch_tree_unlock(sch); kfree(prealloc); gred_offload(sch, TC_GRED_REPLACE); return 0; err_unlock_free: sch_tree_unlock(sch); kfree(prealloc); return err; } static int gred_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *tb[TCA_GRED_MAX + 1]; int err; if (!opt) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) { NL_SET_ERR_MSG_MOD(extack, "virtual queue configuration can't be specified at initialization time"); return -EINVAL; } if (tb[TCA_GRED_LIMIT]) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); else sch->limit = qdisc_dev(sch)->tx_queue_len * psched_mtu(qdisc_dev(sch)); if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); if (!table->opt) return -ENOMEM; } return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *parms, *vqs, *opts = NULL; int i; u32 max_p[MAX_DPs]; struct tc_gred_sopt sopt = { .DPs = table->DPs, .def_DP = table->def, .grio = gred_rio_mode(table), .flags = table->red_flags, }; if (gred_offload_dump_stats(sch)) goto nla_put_failure; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; max_p[i] = q ? q->parms.max_P : 0; } if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) goto nla_put_failure; /* Old style all-in-one dump of VQs */ parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; unsigned long qavg; memset(&opt, 0, sizeof(opt)); if (!q) { /* hack -- fix at some point with proper message This is how we indicate to tc that there is no VQ at this DP */ opt.DP = MAX_DPs + i; goto append_opt; } opt.limit = q->limit; opt.DP = q->DP; opt.backlog = gred_backlog(table, q, sch); opt.prio = q->prio; opt.qth_min = q->parms.qth_min >> q->parms.Wlog; opt.qth_max = q->parms.qth_max >> q->parms.Wlog; opt.Wlog = q->parms.Wlog; opt.Plog = q->parms.Plog; opt.Scell_log = q->parms.Scell_log; opt.early = q->stats.prob_drop; opt.forced = q->stats.forced_drop; opt.pdrop = q->stats.pdrop; opt.packets = q->packetsin; opt.bytesin = q->bytesin; if (gred_wred_mode(table)) gred_load_wred_set(table, q); qavg = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg >> q->parms.Wlog); opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) goto nla_put_failure; } nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct nlattr *vq; if (!q) continue; vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags)) goto nla_put_failure; /* Stats */ if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin, TCA_GRED_VQ_PAD)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG, gred_backlog(table, q, sch))) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP, q->stats.prob_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK, q->stats.prob_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP, q->stats.forced_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK, q->stats.forced_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) goto nla_put_failure; nla_nest_end(skb, vq); } nla_nest_end(skb, vqs); return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); if (table->opt) gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .id = "gred", .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, .init = gred_init, .reset = gred_reset, .destroy = gred_destroy, .change = gred_change, .dump = gred_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("gred"); static int __init gred_module_init(void) { return register_qdisc(&gred_qdisc_ops); } static void __exit gred_module_exit(void) { unregister_qdisc(&gred_qdisc_ops); } module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic Random Early Detection qdisc"); |
| 12 54 162 1754 6908 1714 150 251 10 46 54 58 57 15 56 56 54 14 16 93 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 | /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/userfaultfd_k.h * * Copyright (C) 2015 Red Hat, Inc. * */ #ifndef _LINUX_USERFAULTFD_K_H #define _LINUX_USERFAULTFD_K_H #ifdef CONFIG_USERFAULTFD #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm-generic/pgtable_uffd.h> #include <linux/hugetlb_inline.h> /* The set of all possible UFFD-related VM flags. */ #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from userfaultfd, in order to leave a free define-space for * shared O_* flags. */ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. * * Locking order: * fd_wqh.lock * fault_pending_wqh.lock * fault_wqh.lock * event_wqh.lock * * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ wait_queue_head_t fault_pending_wqh; /* waitqueue head for the userfaults */ wait_queue_head_t fault_wqh; /* waitqueue head for the pseudo fd to wakeup poll/read */ wait_queue_head_t fd_wqh; /* waitqueue head for events */ wait_queue_head_t event_wqh; /* a refile sequence protected by fault_pending_wqh lock */ seqcount_spinlock_t refile_seq; /* pseudo fd refcounting */ refcount_t refcount; /* userfaultfd syscall flags */ unsigned int flags; /* features requested from the userspace */ unsigned int features; /* released */ bool released; /* * Prevents userfaultfd operations (fill/move/wp) from happening while * some non-cooperative event(s) is taking place. Increments are done * in write-mode. Whereas, userfaultfd operations, which includes * reading mmap_changing, is done under read-mode. */ struct rw_semaphore map_changing_lock; /* memory mappings are changing because of non-cooperative event */ atomic_t mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; }; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ typedef unsigned int __bitwise uffd_flags_t; /* Mutually exclusive modes of operation. */ enum mfill_atomic_mode { MFILL_ATOMIC_COPY, MFILL_ATOMIC_ZEROPAGE, MFILL_ATOMIC_CONTINUE, MFILL_ATOMIC_POISON, NR_MFILL_ATOMIC_MODES, }; #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) { return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); } static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) { flags &= ~MFILL_ATOMIC_MODE_MASK; return flags | ((__force uffd_flags_t) mode); } /* Flags controlling behavior. These behavior changes are mode-independent. */ #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) extern int mfill_atomic_install_pte(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, struct page *page, bool newly_allocated, uffd_flags_t flags); extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, uffd_flags_t flags); extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long len); extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long len, uffd_flags_t flags); extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, unsigned long len, uffd_flags_t flags); extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* move_pages */ void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, __u64 flags); int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long dst_addr, unsigned long src_addr); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; } /* * Never enable huge pmd sharing on some uffd registered vmas: * * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. * * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for * VMAs which share huge pmds. (If you have two mappings to the same * underlying pages, and fault in the non-UFFD-registered one with a write, * with huge pmd sharing this would *also* setup the second UFFD-registered * mapping, and we'd not get minor faults.) */ static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); } /* * Don't do fault around for either WP or MINOR registered uffd range. For * MINOR registered range, fault around will be a total disaster and ptes can * be installed without notifications; for WP it should mostly be fine as long * as the fault around checks for pte_none() before the installation, however * to be super safe we just forbid it. */ static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); } static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MISSING; } static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_WP; } static inline bool userfaultfd_minor(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MINOR; } static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { return userfaultfd_wp(vma) && pte_uffd_wp(pte); } static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, pmd_t pmd) { return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); } static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return vma->vm_flags & __VM_UFFD_FLAGS; } static inline bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, bool wp_async) { vm_flags &= __VM_UFFD_FLAGS; if (vma->vm_flags & VM_DROPPABLE) return false; if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; /* * If wp async enabled, and WP is the only mode enabled, allow any * memory type. */ if (wp_async && (vm_flags == VM_UFFD_WP)) return true; #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for * uffd-wp, then shmem & hugetlbfs are not supported but only * anonymous. */ if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || vma_is_shmem(vma); } static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; } extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); extern void dup_userfaultfd_complete(struct list_head *); void dup_userfaultfd_fail(struct list_head *); extern void mremap_userfaultfd_prep(struct vm_area_struct *, struct vm_userfaultfd_ctx *); extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf); extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); void userfaultfd_reset_ctx(struct vm_area_struct *vma); struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end); int userfaultfd_register_range(struct userfaultfd_ctx *ctx, struct vm_area_struct *vma, vm_flags_t vm_flags, unsigned long start, unsigned long end, bool wp_async); void userfaultfd_release_new(struct userfaultfd_ctx *ctx); void userfaultfd_release_all(struct mm_struct *mm, struct userfaultfd_ctx *ctx); #else /* CONFIG_USERFAULTFD */ /* mm helpers */ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) { return VM_FAULT_SIGBUS; } static inline long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp) { return false; } static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { return true; } static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_minor(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { return false; } static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, pmd_t pmd) { return false; } static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return false; } static inline int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *l) { return 0; } static inline void dup_userfaultfd_complete(struct list_head *l) { } static inline void dup_userfaultfd_fail(struct list_head *l) { } static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *ctx) { } static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, unsigned long from, unsigned long to, unsigned long len) { } static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) { } static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf) { return 0; } static inline void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { } static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) { return false; } static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { return false; } #endif /* CONFIG_USERFAULTFD */ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) { /* Only wr-protect mode uses pte markers */ if (!userfaultfd_wp(vma)) return false; /* File-based uffd-wp always need markers */ if (!vma_is_anonymous(vma)) return true; /* * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED * enabled (to apply markers on zero pages). */ return userfaultfd_wp_unpopulated(vma); } static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) { #ifdef CONFIG_PTE_MARKER_UFFD_WP return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); #else return false; #endif } static inline bool pte_marker_uffd_wp(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP swp_entry_t entry; if (!is_swap_pte(pte)) return false; entry = pte_to_swp_entry(pte); return pte_marker_entry_uffd_wp(entry); #else return false; #endif } /* * Returns true if this is a swap pte and was uffd-wp wr-protected in either * forms (pte marker or a normal swap pte), false otherwise. */ static inline bool pte_swp_uffd_wp_any(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP if (!is_swap_pte(pte)) return false; if (pte_swp_uffd_wp(pte)) return true; if (pte_marker_uffd_wp(pte)) return true; #endif return false; } #endif /* _LINUX_USERFAULTFD_K_H */ |
| 7 7 6 6 7 7 7 7 1 7 7 7 7 7 6 7 7 7 1 1 6 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | // SPDX-License-Identifier: GPL-2.0-or-later /* * lib/ts_bm.c Boyer-Moore text search implementation * * Authors: Pablo Neira Ayuso <pablo@eurodev.net> * * ========================================================================== * * Implements Boyer-Moore string matching algorithm: * * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. * Communications of the Association for Computing Machinery, * 20(10), 1977, pp. 762-772. * https://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf * * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf * * Note: Since Boyer-Moore (BM) performs searches for matchings from right * to left, it's still possible that a matching could be spread over * multiple blocks, in that case this algorithm won't find any coincidence. * * If you're willing to ensure that such thing won't ever happen, use the * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose * the proper string search algorithm depending on your setting. * * Say you're using the textsearch infrastructure for filtering, NIDS or * any similar security focused purpose, then go KMP. Otherwise, if you * really care about performance, say you're classifying packets to apply * Quality of Service (QoS) policies, and you don't mind about possible * matchings spread over multiple fragments, then go BM. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/textsearch.h> /* Alphabet size, use ASCII */ #define ASIZE 256 #if 0 #define DEBUGP printk #else #define DEBUGP(args, format...) #endif struct ts_bm { u8 * pattern; unsigned int patlen; unsigned int bad_shift[ASIZE]; unsigned int good_shift[]; }; static unsigned int matchpat(const u8 *pattern, unsigned int patlen, const u8 *text, bool icase) { unsigned int i; for (i = 0; i < patlen; i++) { u8 t = *(text-i); if (icase) t = toupper(t); if (t != *(pattern-i)) break; } return i; } static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) { struct ts_bm *bm = ts_config_priv(conf); unsigned int i, text_len, consumed = state->offset; const u8 *text; int bs; const u8 icase = conf->flags & TS_IGNORECASE; for (;;) { int shift = bm->patlen - 1; text_len = conf->get_next_block(consumed, &text, conf, state); if (unlikely(text_len == 0)) break; while (shift < text_len) { DEBUGP("Searching in position %d (%c)\n", shift, text[shift]); i = matchpat(&bm->pattern[bm->patlen-1], bm->patlen, &text[shift], icase); if (i == bm->patlen) { /* London calling... */ DEBUGP("found!\n"); return consumed + (shift-(bm->patlen-1)); } bs = bm->bad_shift[text[shift-i]]; /* Now jumping to... */ shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); } consumed += text_len; } return UINT_MAX; } static int subpattern(u8 *pattern, int i, int j, int g) { int x = i+g-1, y = j+g-1, ret = 0; while(pattern[x--] == pattern[y--]) { if (y < 0) { ret = 1; break; } if (--g == 0) { ret = pattern[i-1] != pattern[j-1]; break; } } return ret; } static void compute_prefix_tbl(struct ts_bm *bm, int flags) { int i, j, g; for (i = 0; i < ASIZE; i++) bm->bad_shift[i] = bm->patlen; for (i = 0; i < bm->patlen - 1; i++) { bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i; if (flags & TS_IGNORECASE) bm->bad_shift[tolower(bm->pattern[i])] = bm->patlen - 1 - i; } /* Compute the good shift array, used to match reocurrences * of a subpattern */ bm->good_shift[0] = 1; for (i = 1; i < bm->patlen; i++) bm->good_shift[i] = bm->patlen; for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { for (j = i-1; j >= 1-g ; j--) if (subpattern(bm->pattern, i, j, g)) { bm->good_shift[g] = bm->patlen-j-g; break; } } } static struct ts_config *bm_init(const void *pattern, unsigned int len, gfp_t gfp_mask, int flags) { struct ts_config *conf; struct ts_bm *bm; int i; unsigned int prefix_tbl_len = len * sizeof(unsigned int); size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) return conf; conf->flags = flags; bm = ts_config_priv(conf); bm->patlen = len; bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; if (flags & TS_IGNORECASE) for (i = 0; i < len; i++) bm->pattern[i] = toupper(((u8 *)pattern)[i]); else memcpy(bm->pattern, pattern, len); compute_prefix_tbl(bm, flags); return conf; } static void *bm_get_pattern(struct ts_config *conf) { struct ts_bm *bm = ts_config_priv(conf); return bm->pattern; } static unsigned int bm_get_pattern_len(struct ts_config *conf) { struct ts_bm *bm = ts_config_priv(conf); return bm->patlen; } static struct ts_ops bm_ops = { .name = "bm", .find = bm_find, .init = bm_init, .get_pattern = bm_get_pattern, .get_pattern_len = bm_get_pattern_len, .owner = THIS_MODULE, .list = LIST_HEAD_INIT(bm_ops.list) }; static int __init init_bm(void) { return textsearch_register(&bm_ops); } static void __exit exit_bm(void) { textsearch_unregister(&bm_ops); } MODULE_DESCRIPTION("Boyer-Moore text search implementation"); MODULE_LICENSE("GPL"); module_init(init_bm); module_exit(exit_bm); |
| 3 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* I2C message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM #define TRACE_SYSTEM i2c #if !defined(_TRACE_I2C_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_I2C_H #include <linux/i2c.h> #include <linux/tracepoint.h> /* * drivers/i2c/i2c-core-base.c */ extern int i2c_transfer_trace_reg(void); extern void i2c_transfer_trace_unreg(void); /* * __i2c_transfer() write request */ TRACE_EVENT_FN(i2c_write, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read request */ TRACE_EVENT_FN(i2c_read, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read reply */ TRACE_EVENT_FN(i2c_reply, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() result */ TRACE_EVENT_FN(i2c_result, TP_PROTO(const struct i2c_adapter *adap, int num, int ret), TP_ARGS(adap, num, ret), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, nr_msgs ) __field(__s16, ret ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->nr_msgs = num; __entry->ret = ret; ), TP_printk("i2c-%d n=%u ret=%d", __entry->adapter_nr, __entry->nr_msgs, __entry->ret ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); #endif /* _TRACE_I2C_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 1297 1276 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | // SPDX-License-Identifier: GPL-2.0 /* * Dynamic byte queue limits. See include/linux/dynamic_queue_limits.h * * Copyright (c) 2011, Tom Herbert <therbert@google.com> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/dynamic_queue_limits.h> #include <linux/compiler.h> #include <linux/export.h> #include <trace/events/napi.h> #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) static void dql_check_stall(struct dql *dql, unsigned short stall_thrs) { unsigned long now; if (!stall_thrs) return; now = jiffies; /* Check for a potential stall */ if (time_after_eq(now, dql->last_reap + stall_thrs)) { unsigned long hist_head, t, start, end; /* We are trying to detect a period of at least @stall_thrs * jiffies without any Tx completions, but during first half * of which some Tx was posted. */ dqs_again: hist_head = READ_ONCE(dql->history_head); /* pairs with smp_wmb() in dql_queued() */ smp_rmb(); /* Get the previous entry in the ring buffer, which is the * oldest sample. */ start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG; /* Advance start to continue from the last reap time */ if (time_before(start, dql->last_reap + 1)) start = dql->last_reap + 1; /* Newest sample we should have already seen a completion for */ end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1); /* Shrink the search space to [start, (now - start_thrs/2)] if * `end` is beyond the stall zone */ if (time_before(now, end + stall_thrs / 2)) end = now - stall_thrs / 2; /* Search for the queued time in [t, end] */ for (t = start; time_before_eq(t, end); t++) if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG), dql->history)) break; /* Variable t contains the time of the queue */ if (!time_before_eq(t, end)) goto no_stall; /* The ring buffer was modified in the meantime, retry */ if (hist_head != READ_ONCE(dql->history_head)) goto dqs_again; dql->stall_cnt++; dql->stall_max = max_t(unsigned short, dql->stall_max, now - t); trace_dql_stall_detected(dql->stall_thrs, now - t, dql->last_reap, dql->history_head, now, dql->history); } no_stall: dql->last_reap = now; } /* Records completed count and recalculates the queue limit */ void dql_completed(struct dql *dql, unsigned int count) { unsigned int inprogress, prev_inprogress, limit; unsigned int ovlimit, completed, num_queued; unsigned short stall_thrs; bool all_prev_completed; num_queued = READ_ONCE(dql->num_queued); /* Read stall_thrs in advance since it belongs to the same (first) * cache line as ->num_queued. This way, dql_check_stall() does not * need to touch the first cache line again later, reducing the window * of possible false sharing. */ stall_thrs = READ_ONCE(dql->stall_thrs); /* Can't complete more than what's in queue */ BUG_ON(count > num_queued - dql->num_completed); completed = dql->num_completed + count; limit = dql->limit; ovlimit = POSDIFF(num_queued - dql->num_completed, limit); inprogress = num_queued - completed; prev_inprogress = dql->prev_num_queued - dql->num_completed; all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued); if ((ovlimit && !inprogress) || (dql->prev_ovlimit && all_prev_completed)) { /* * Queue considered starved if: * - The queue was over-limit in the last interval, * and there is no more data in the queue. * OR * - The queue was over-limit in the previous interval and * when enqueuing it was possible that all queued data * had been consumed. This covers the case when queue * may have becomes starved between completion processing * running and next time enqueue was scheduled. * * When queue is starved increase the limit by the amount * of bytes both sent and completed in the last interval, * plus any previous over-limit. */ limit += POSDIFF(completed, dql->prev_num_queued) + dql->prev_ovlimit; dql->slack_start_time = jiffies; dql->lowest_slack = UINT_MAX; } else if (inprogress && prev_inprogress && !all_prev_completed) { /* * Queue was not starved, check if the limit can be decreased. * A decrease is only considered if the queue has been busy in * the whole interval (the check above). * * If there is slack, the amount of excess data queued above * the amount needed to prevent starvation, the queue limit * can be decreased. To avoid hysteresis we consider the * minimum amount of slack found over several iterations of the * completion routine. */ unsigned int slack, slack_last_objs; /* * Slack is the maximum of * - The queue limit plus previous over-limit minus twice * the number of objects completed. Note that two times * number of completed bytes is a basis for an upper bound * of the limit. * - Portion of objects in the last queuing operation that * was not part of non-zero previous over-limit. That is * "round down" by non-overlimit portion of the last * queueing operation. */ slack = POSDIFF(limit + dql->prev_ovlimit, 2 * (completed - dql->num_completed)); slack_last_objs = dql->prev_ovlimit ? POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0; slack = max(slack, slack_last_objs); if (slack < dql->lowest_slack) dql->lowest_slack = slack; if (time_after(jiffies, dql->slack_start_time + dql->slack_hold_time)) { limit = POSDIFF(limit, dql->lowest_slack); dql->slack_start_time = jiffies; dql->lowest_slack = UINT_MAX; } } /* Enforce bounds on limit */ limit = clamp(limit, dql->min_limit, dql->max_limit); if (limit != dql->limit) { dql->limit = limit; ovlimit = 0; } dql->adj_limit = limit + completed; dql->prev_ovlimit = ovlimit; dql->prev_last_obj_cnt = READ_ONCE(dql->last_obj_cnt); dql->num_completed = completed; dql->prev_num_queued = num_queued; dql_check_stall(dql, stall_thrs); } EXPORT_SYMBOL(dql_completed); void dql_reset(struct dql *dql) { /* Reset all dynamic values */ dql->limit = dql->min_limit; dql->num_queued = 0; dql->num_completed = 0; dql->last_obj_cnt = 0; dql->prev_num_queued = 0; dql->prev_last_obj_cnt = 0; dql->prev_ovlimit = 0; dql->lowest_slack = UINT_MAX; dql->slack_start_time = jiffies; dql->last_reap = jiffies; dql->history_head = jiffies / BITS_PER_LONG; memset(dql->history, 0, sizeof(dql->history)); } EXPORT_SYMBOL(dql_reset); void dql_init(struct dql *dql, unsigned int hold_time) { dql->max_limit = DQL_MAX_LIMIT; dql->min_limit = 0; dql->slack_hold_time = hold_time; dql->stall_thrs = 0; dql_reset(dql); } EXPORT_SYMBOL(dql_init); |
| 1667 1007 1006 1008 48 47 5 51 12 12 9 849 46 81 14 23 1007 7 53 53 11 11 46 38 46 53 130 1006 319 321 776 777 160 160 159 1358 1006 63 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | // SPDX-License-Identifier: GPL-2.0-only /* tnum: tracked (or tristate) numbers * * A tnum tracks knowledge about the bits of a value. Each bit can be either * known (0 or 1), or unknown (x). Arithmetic operations on tnums will * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ #include <linux/kernel.h> #include <linux/tnum.h> #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} /* A completely unknown value */ const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; struct tnum tnum_const(u64 value) { return TNUM(value, 0); } struct tnum tnum_range(u64 min, u64 max) { u64 chi = min ^ max, delta; u8 bits = fls64(chi); /* special case, needed because 1ULL << 64 is undefined */ if (bits > 63) return tnum_unknown; /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return * constant min (since min == max). */ delta = (1ULL << bits) - 1; return TNUM(min & ~delta, delta); } struct tnum tnum_lshift(struct tnum a, u8 shift) { return TNUM(a.value << shift, a.mask << shift); } struct tnum tnum_rshift(struct tnum a, u8 shift) { return TNUM(a.value >> shift, a.mask >> shift); } struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness) { /* if a.value is negative, arithmetic shifting by minimum shift * will have larger negative offset compared to more shifting. * If a.value is nonnegative, arithmetic shifting by minimum shift * will have larger positive offset compare to more shifting. */ if (insn_bitness == 32) return TNUM((u32)(((s32)a.value) >> min_shift), (u32)(((s32)a.mask) >> min_shift)); else return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); } struct tnum tnum_add(struct tnum a, struct tnum b) { u64 sm, sv, sigma, chi, mu; sm = a.mask + b.mask; sv = a.value + b.value; sigma = sm + sv; chi = sigma ^ sv; mu = chi | a.mask | b.mask; return TNUM(sv & ~mu, mu); } struct tnum tnum_sub(struct tnum a, struct tnum b) { u64 dv, alpha, beta, chi, mu; dv = a.value - b.value; alpha = dv + a.mask; beta = dv - b.mask; chi = alpha ^ beta; mu = chi | a.mask | b.mask; return TNUM(dv & ~mu, mu); } struct tnum tnum_neg(struct tnum a) { return tnum_sub(TNUM(0, 0), a); } struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; alpha = a.value | a.mask; beta = b.value | b.mask; v = a.value & b.value; return TNUM(v, alpha & beta & ~v); } struct tnum tnum_or(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask | b.mask; return TNUM(v, mu & ~v); } struct tnum tnum_xor(struct tnum a, struct tnum b) { u64 v, mu; v = a.value ^ b.value; mu = a.mask | b.mask; return TNUM(v & ~mu, mu); } /* Perform long multiplication, iterating through the bits in a using rshift: * - if LSB(a) is a known 0, keep current accumulator * - if LSB(a) is a known 1, add b to current accumulator * - if LSB(a) is unknown, take a union of the above cases. * * For example: * * acc_0: acc_1: * * 11 * -> 11 * -> 11 * -> union(0011, 1001) == x0x1 * x1 01 11 * ------ ------ ------ * 11 11 11 * xx 00 11 * ------ ------ ------ * ???? 0011 1001 */ struct tnum tnum_mul(struct tnum a, struct tnum b) { struct tnum acc = TNUM(0, 0); while (a.value || a.mask) { /* LSB of tnum a is a certain 1 */ if (a.value & 1) acc = tnum_add(acc, b); /* LSB of tnum a is uncertain */ else if (a.mask & 1) { /* acc = tnum_union(acc_0, acc_1), where acc_0 and * acc_1 are partial accumulators for cases * LSB(a) = certain 0 and LSB(a) = certain 1. * acc_0 = acc + 0 * b = acc. * acc_1 = acc + 1 * b = tnum_add(acc, b). */ acc = tnum_union(acc, tnum_add(acc, b)); } /* Note: no case for LSB is certain 0 */ a = tnum_rshift(a, 1); b = tnum_lshift(b, 1); } return acc; } bool tnum_overlap(struct tnum a, struct tnum b) { u64 mu; mu = ~a.mask & ~b.mask; return (a.value & mu) == (b.value & mu); } /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ struct tnum tnum_intersect(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask & b.mask; return TNUM(v & ~mu, mu); } /* Returns a tnum with the uncertainty from both a and b, and in addition, new * uncertainty at any position that a and b disagree. This represents a * superset of the union of the concrete sets of both a and b. Despite the * overapproximation, it is optimal. */ struct tnum tnum_union(struct tnum a, struct tnum b) { u64 v = a.value & b.value; u64 mu = (a.value ^ b.value) | a.mask | b.mask; return TNUM(v & ~mu, mu); } struct tnum tnum_cast(struct tnum a, u8 size) { a.value &= (1ULL << (size * 8)) - 1; a.mask &= (1ULL << (size * 8)) - 1; return a; } bool tnum_is_aligned(struct tnum a, u64 size) { if (!size) return true; return !((a.value | a.mask) & (size - 1)); } bool tnum_in(struct tnum a, struct tnum b) { if (b.mask & ~a.mask) return false; b.value &= ~a.mask; return a.value == b.value; } int tnum_sbin(char *str, size_t size, struct tnum a) { size_t n; for (n = 64; n; n--) { if (n < size) { if (a.mask & 1) str[n - 1] = 'x'; else if (a.value & 1) str[n - 1] = '1'; else str[n - 1] = '0'; } a.mask >>= 1; a.value >>= 1; } str[min(size - 1, (size_t)64)] = 0; return 64; } struct tnum tnum_subreg(struct tnum a) { return tnum_cast(a, 4); } struct tnum tnum_clear_subreg(struct tnum a) { return tnum_lshift(tnum_rshift(a, 32), 32); } struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg) { return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg)); } struct tnum tnum_const_subreg(struct tnum a, u32 value) { return tnum_with_subreg(a, tnum_const(value)); } |
| 15 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | // SPDX-License-Identifier: MIT #include <uapi/linux/sched/types.h> #include <linux/export.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include <drm/drm_vblank_work.h> #include <drm/drm_crtc.h> #include "drm_internal.h" /** * DOC: vblank works * * Many DRM drivers need to program hardware in a time-sensitive manner, many * times with a deadline of starting and finishing within a certain region of * the scanout. Most of the time the safest way to accomplish this is to * simply do said time-sensitive programming in the driver's IRQ handler, * which allows drivers to avoid being preempted during these critical * regions. Or even better, the hardware may even handle applying such * time-critical programming independently of the CPU. * * While there's a decent amount of hardware that's designed so that the CPU * doesn't need to be concerned with extremely time-sensitive programming, * there's a few situations where it can't be helped. Some unforgiving * hardware may require that certain time-sensitive programming be handled * completely by the CPU, and said programming may even take too long to * handle in an IRQ handler. Another such situation would be where the driver * needs to perform a task that needs to complete within a specific scanout * period, but might possibly block and thus cannot be handled in an IRQ * context. Both of these situations can't be solved perfectly in Linux since * we're not a realtime kernel, and thus the scheduler may cause us to miss * our deadline if it decides to preempt us. But for some drivers, it's good * enough if we can lower our chance of being preempted to an absolute * minimum. * * This is where &drm_vblank_work comes in. &drm_vblank_work provides a simple * generic delayed work implementation which delays work execution until a * particular vblank has passed, and then executes the work at realtime * priority. This provides the best possible chance at performing * time-sensitive hardware programming on time, even when the system is under * heavy load. &drm_vblank_work also supports rescheduling, so that self * re-arming work items can be easily implemented. */ void drm_handle_vblank_works(struct drm_vblank_crtc *vblank) { struct drm_vblank_work *work, *next; u64 count = atomic64_read(&vblank->count); bool wake = false; assert_spin_locked(&vblank->dev->event_lock); list_for_each_entry_safe(work, next, &vblank->pending_work, node) { if (!drm_vblank_passed(count, work->count)) continue; list_del_init(&work->node); drm_vblank_put(vblank->dev, vblank->pipe); kthread_queue_work(vblank->worker, &work->base); wake = true; } if (wake) wake_up_all(&vblank->work_wait_queue); } /* Handle cancelling any pending vblank work items and drop respective vblank * references in response to vblank interrupts being disabled. */ void drm_vblank_cancel_pending_works(struct drm_vblank_crtc *vblank) { struct drm_vblank_work *work, *next; assert_spin_locked(&vblank->dev->event_lock); drm_WARN_ONCE(vblank->dev, !list_empty(&vblank->pending_work), "Cancelling pending vblank works!\n"); list_for_each_entry_safe(work, next, &vblank->pending_work, node) { list_del_init(&work->node); drm_vblank_put(vblank->dev, vblank->pipe); } wake_up_all(&vblank->work_wait_queue); } /** * drm_vblank_work_schedule - schedule a vblank work * @work: vblank work to schedule * @count: target vblank count * @nextonmiss: defer until the next vblank if target vblank was missed * * Schedule @work for execution once the crtc vblank count reaches @count. * * If the crtc vblank count has already reached @count and @nextonmiss is * %false the work starts to execute immediately. * * If the crtc vblank count has already reached @count and @nextonmiss is * %true the work is deferred until the next vblank (as if @count has been * specified as crtc vblank count + 1). * * If @work is already scheduled, this function will reschedule said work * using the new @count. This can be used for self-rearming work items. * * Returns: * %1 if @work was successfully (re)scheduled, %0 if it was either already * scheduled or cancelled, or a negative error code on failure. */ int drm_vblank_work_schedule(struct drm_vblank_work *work, u64 count, bool nextonmiss) { struct drm_vblank_crtc *vblank = work->vblank; struct drm_device *dev = vblank->dev; u64 cur_vbl; unsigned long irqflags; bool passed, inmodeset, rescheduling = false, wake = false; int ret = 0; spin_lock_irqsave(&dev->event_lock, irqflags); if (work->cancelling) goto out; spin_lock(&dev->vbl_lock); inmodeset = vblank->inmodeset; spin_unlock(&dev->vbl_lock); if (inmodeset) goto out; if (list_empty(&work->node)) { ret = drm_vblank_get(dev, vblank->pipe); if (ret < 0) goto out; } else if (work->count == count) { /* Already scheduled w/ same vbl count */ goto out; } else { rescheduling = true; } work->count = count; cur_vbl = drm_vblank_count(dev, vblank->pipe); passed = drm_vblank_passed(cur_vbl, count); if (passed) drm_dbg_core(dev, "crtc %d vblank %llu already passed (current %llu)\n", vblank->pipe, count, cur_vbl); if (!nextonmiss && passed) { drm_vblank_put(dev, vblank->pipe); ret = kthread_queue_work(vblank->worker, &work->base); if (rescheduling) { list_del_init(&work->node); wake = true; } } else { if (!rescheduling) list_add_tail(&work->node, &vblank->pending_work); ret = true; } out: spin_unlock_irqrestore(&dev->event_lock, irqflags); if (wake) wake_up_all(&vblank->work_wait_queue); return ret; } EXPORT_SYMBOL(drm_vblank_work_schedule); /** * drm_vblank_work_cancel_sync - cancel a vblank work and wait for it to * finish executing * @work: vblank work to cancel * * Cancel an already scheduled vblank work and wait for its * execution to finish. * * On return, @work is guaranteed to no longer be scheduled or running, even * if it's self-arming. * * Returns: * %True if the work was cancelled before it started to execute, %false * otherwise. */ bool drm_vblank_work_cancel_sync(struct drm_vblank_work *work) { struct drm_vblank_crtc *vblank = work->vblank; struct drm_device *dev = vblank->dev; bool ret = false; spin_lock_irq(&dev->event_lock); if (!list_empty(&work->node)) { list_del_init(&work->node); drm_vblank_put(vblank->dev, vblank->pipe); ret = true; } work->cancelling++; spin_unlock_irq(&dev->event_lock); wake_up_all(&vblank->work_wait_queue); if (kthread_cancel_work_sync(&work->base)) ret = true; spin_lock_irq(&dev->event_lock); work->cancelling--; spin_unlock_irq(&dev->event_lock); return ret; } EXPORT_SYMBOL(drm_vblank_work_cancel_sync); /** * drm_vblank_work_flush - wait for a scheduled vblank work to finish * executing * @work: vblank work to flush * * Wait until @work has finished executing once. */ void drm_vblank_work_flush(struct drm_vblank_work *work) { struct drm_vblank_crtc *vblank = work->vblank; struct drm_device *dev = vblank->dev; spin_lock_irq(&dev->event_lock); wait_event_lock_irq(vblank->work_wait_queue, list_empty(&work->node), dev->event_lock); spin_unlock_irq(&dev->event_lock); kthread_flush_work(&work->base); } EXPORT_SYMBOL(drm_vblank_work_flush); /** * drm_vblank_work_flush_all - flush all currently pending vblank work on crtc. * @crtc: crtc for which vblank work to flush * * Wait until all currently queued vblank work on @crtc * has finished executing once. */ void drm_vblank_work_flush_all(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(crtc)]; spin_lock_irq(&dev->event_lock); wait_event_lock_irq(vblank->work_wait_queue, list_empty(&vblank->pending_work), dev->event_lock); spin_unlock_irq(&dev->event_lock); kthread_flush_worker(vblank->worker); } EXPORT_SYMBOL(drm_vblank_work_flush_all); /** * drm_vblank_work_init - initialize a vblank work item * @work: vblank work item * @crtc: CRTC whose vblank will trigger the work execution * @func: work function to be executed * * Initialize a vblank work item for a specific crtc. */ void drm_vblank_work_init(struct drm_vblank_work *work, struct drm_crtc *crtc, void (*func)(struct kthread_work *work)) { kthread_init_work(&work->base, func); INIT_LIST_HEAD(&work->node); work->vblank = drm_crtc_vblank_crtc(crtc); } EXPORT_SYMBOL(drm_vblank_work_init); int drm_vblank_worker_init(struct drm_vblank_crtc *vblank) { struct kthread_worker *worker; INIT_LIST_HEAD(&vblank->pending_work); init_waitqueue_head(&vblank->work_wait_queue); worker = kthread_run_worker(0, "card%d-crtc%d", vblank->dev->primary->index, vblank->pipe); if (IS_ERR(worker)) return PTR_ERR(worker); vblank->worker = worker; sched_set_fifo(worker->task); return 0; } |
| 39 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 | // SPDX-License-Identifier: GPL-2.0 /* * The base64 encode/decode code was copied from fscrypt: * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility * Written by Uday Savagaonkar, 2014. * Modified by Jaegeuk Kim, 2015. */ #include <linux/ceph/ceph_debug.h> #include <linux/xattr.h> #include <linux/fscrypt.h> #include <linux/ceph/striper.h> #include "super.h" #include "mds_client.h" #include "crypto.h" /* * The base64url encoding used by fscrypt includes the '_' character, which may * cause problems in snapshot names (which can not start with '_'). Thus, we * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead, * which replaces '-' and '_' by '+' and ','. */ static const char base64_table[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; int ceph_base64_encode(const u8 *src, int srclen, char *dst) { u32 ac = 0; int bits = 0; int i; char *cp = dst; for (i = 0; i < srclen; i++) { ac = (ac << 8) | src[i]; bits += 8; do { bits -= 6; *cp++ = base64_table[(ac >> bits) & 0x3f]; } while (bits >= 6); } if (bits) *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; return cp - dst; } int ceph_base64_decode(const char *src, int srclen, u8 *dst) { u32 ac = 0; int bits = 0; int i; u8 *bp = dst; for (i = 0; i < srclen; i++) { const char *p = strchr(base64_table, src[i]); if (p == NULL || src[i] == 0) return -1; ac = (ac << 6) | (p - base64_table); bits += 6; if (bits >= 8) { bits -= 8; *bp++ = (u8)(ac >> bits); } } if (ac & ((1 << bits) - 1)) return -1; return bp - dst; } static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth; u32 ctxlen; /* Non existent or too short? */ if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1))) return -ENOBUFS; /* Some format we don't recognize? */ if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION) return -ENOBUFS; ctxlen = le32_to_cpu(cfa->cfa_blob_len); if (len < ctxlen) return -ERANGE; memcpy(ctx, cfa->cfa_blob, ctxlen); return ctxlen; } static int ceph_crypt_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { int ret; struct iattr attr = { }; struct ceph_iattr cia = { }; struct ceph_fscrypt_auth *cfa; WARN_ON_ONCE(fs_data); if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE) return -EINVAL; cfa = kzalloc(sizeof(*cfa), GFP_KERNEL); if (!cfa) return -ENOMEM; cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION); cfa->cfa_blob_len = cpu_to_le32(len); memcpy(cfa->cfa_blob, ctx, len); cia.fscrypt_auth = cfa; ret = __ceph_setattr(&nop_mnt_idmap, inode, &attr, &cia); if (ret == 0) inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); kfree(cia.fscrypt_auth); return ret; } static bool ceph_crypt_empty_dir(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); return ci->i_rsubdirs + ci->i_rfiles == 1; } static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb) { return ceph_sb_to_fs_client(sb)->fsc_dummy_enc_policy.policy; } static struct fscrypt_operations ceph_fscrypt_ops = { .inode_info_offs = (int)offsetof(struct ceph_inode_info, i_crypt_info) - (int)offsetof(struct ceph_inode_info, netfs.inode), .needs_bounce_pages = 1, .get_context = ceph_crypt_get_context, .set_context = ceph_crypt_set_context, .get_dummy_policy = ceph_get_dummy_policy, .empty_dir = ceph_crypt_empty_dir, }; void ceph_fscrypt_set_ops(struct super_block *sb) { fscrypt_set_ops(sb, &ceph_fscrypt_ops); } void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc) { fscrypt_free_dummy_policy(&fsc->fsc_dummy_enc_policy); } int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode, struct ceph_acl_sec_ctx *as) { int ret, ctxsize; bool encrypted = false; struct ceph_inode_info *ci = ceph_inode(inode); ret = fscrypt_prepare_new_inode(dir, inode, &encrypted); if (ret) return ret; if (!encrypted) return 0; as->fscrypt_auth = kzalloc(sizeof(*as->fscrypt_auth), GFP_KERNEL); if (!as->fscrypt_auth) return -ENOMEM; ctxsize = fscrypt_context_for_new_inode(as->fscrypt_auth->cfa_blob, inode); if (ctxsize < 0) return ctxsize; as->fscrypt_auth->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION); as->fscrypt_auth->cfa_blob_len = cpu_to_le32(ctxsize); WARN_ON_ONCE(ci->fscrypt_auth); kfree(ci->fscrypt_auth); ci->fscrypt_auth_len = ceph_fscrypt_auth_len(as->fscrypt_auth); ci->fscrypt_auth = kmemdup(as->fscrypt_auth, ci->fscrypt_auth_len, GFP_KERNEL); if (!ci->fscrypt_auth) return -ENOMEM; inode->i_flags |= S_ENCRYPTED; return 0; } void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, struct ceph_acl_sec_ctx *as) { swap(req->r_fscrypt_auth, as->fscrypt_auth); } /* * User-created snapshots can't start with '_'. Snapshots that start with this * character are special (hint: there aren't real snapshots) and use the * following format: * * _<SNAPSHOT-NAME>_<INODE-NUMBER> * * where: * - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted, * - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot * * This function parses these snapshot names and returns the inode * <INODE-NUMBER>. 'name_len' will also bet set with the <SNAPSHOT-NAME> * length. */ static struct inode *parse_longname(const struct inode *parent, const char *name, int *name_len) { struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = NULL; struct ceph_vino vino = { .snap = CEPH_NOSNAP }; char *name_end, *inode_number; int ret = -EIO; /* NUL-terminate */ char *str __free(kfree) = kmemdup_nul(name, *name_len, GFP_KERNEL); if (!str) return ERR_PTR(-ENOMEM); /* Skip initial '_' */ str++; name_end = strrchr(str, '_'); if (!name_end) { doutc(cl, "failed to parse long snapshot name: %s\n", str); return ERR_PTR(-EIO); } *name_len = (name_end - str); if (*name_len <= 0) { pr_err_client(cl, "failed to parse long snapshot name\n"); return ERR_PTR(-EIO); } /* Get the inode number */ inode_number = name_end + 1; ret = kstrtou64(inode_number, 10, &vino.ino); if (ret) { doutc(cl, "failed to parse inode number: %s\n", str); return ERR_PTR(ret); } /* And finally the inode */ dir = ceph_find_inode(parent->i_sb, vino); if (!dir) { /* This can happen if we're not mounting cephfs on the root */ dir = ceph_get_inode(parent->i_sb, vino, NULL); if (IS_ERR(dir)) doutc(cl, "can't find inode %s (%s)\n", inode_number, name); } return dir; } int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen) { struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = parent; char *p = buf; u32 len; int name_len = elen; int ret; u8 *cryptbuf = NULL; /* Handle the special case of snapshot names that start with '_' */ if (ceph_snap(dir) == CEPH_SNAPDIR && *p == '_') { dir = parse_longname(parent, p, &name_len); if (IS_ERR(dir)) return PTR_ERR(dir); p++; /* skip initial '_' */ } if (!fscrypt_has_encryption_key(dir)) goto out; /* * Convert cleartext d_name to ciphertext. If result is longer than * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes * * See: fscrypt_setup_filename */ if (!fscrypt_fname_encrypted_size(dir, name_len, NAME_MAX, &len)) { elen = -ENAMETOOLONG; goto out; } /* Allocate a buffer appropriate to hold the result */ cryptbuf = kmalloc(len > CEPH_NOHASH_NAME_MAX ? NAME_MAX : len, GFP_KERNEL); if (!cryptbuf) { elen = -ENOMEM; goto out; } ret = fscrypt_fname_encrypt(dir, &(struct qstr)QSTR_INIT(p, name_len), cryptbuf, len); if (ret) { elen = ret; goto out; } /* hash the end if the name is long enough */ if (len > CEPH_NOHASH_NAME_MAX) { u8 hash[SHA256_DIGEST_SIZE]; u8 *extra = cryptbuf + CEPH_NOHASH_NAME_MAX; /* * hash the extra bytes and overwrite crypttext beyond that * point with it */ sha256(extra, len - CEPH_NOHASH_NAME_MAX, hash); memcpy(extra, hash, SHA256_DIGEST_SIZE); len = CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE; } /* base64 encode the encrypted name */ elen = ceph_base64_encode(cryptbuf, len, p); doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, p); /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ WARN_ON(elen > 240); if (dir != parent) // leading _ is already there; append _<inum> elen += 1 + sprintf(p + elen, "_%ld", dir->i_ino); out: kfree(cryptbuf); if (dir != parent) { if ((dir->i_state & I_NEW)) discard_new_inode(dir); else iput(dir); } return elen; } /** * ceph_fname_to_usr - convert a filename for userland presentation * @fname: ceph_fname to be converted * @tname: temporary name buffer to use for conversion (may be NULL) * @oname: where converted name should be placed * @is_nokey: set to true if key wasn't available during conversion (may be NULL) * * Given a filename (usually from the MDS), format it for presentation to * userland. If @parent is not encrypted, just pass it back as-is. * * Otherwise, base64 decode the string, and then ask fscrypt to format it * for userland presentation. * * Returns 0 on success or negative error code on error. */ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, struct fscrypt_str *oname, bool *is_nokey) { struct inode *dir = fname->dir; struct fscrypt_str _tname = FSTR_INIT(NULL, 0); struct fscrypt_str iname; char *name = fname->name; int name_len = fname->name_len; int ret; /* Sanity check that the resulting name will fit in the buffer */ if (fname->name_len > NAME_MAX || fname->ctext_len > NAME_MAX) return -EIO; /* Handle the special case of snapshot names that start with '_' */ if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) && (name[0] == '_')) { dir = parse_longname(dir, name, &name_len); if (IS_ERR(dir)) return PTR_ERR(dir); name++; /* skip initial '_' */ } if (!IS_ENCRYPTED(dir)) { oname->name = fname->name; oname->len = fname->name_len; ret = 0; goto out_inode; } ret = ceph_fscrypt_prepare_readdir(dir); if (ret) goto out_inode; /* * Use the raw dentry name as sent by the MDS instead of * generating a nokey name via fscrypt. */ if (!fscrypt_has_encryption_key(dir)) { if (fname->no_copy) oname->name = fname->name; else memcpy(oname->name, fname->name, fname->name_len); oname->len = fname->name_len; if (is_nokey) *is_nokey = true; ret = 0; goto out_inode; } if (fname->ctext_len == 0) { int declen; if (!tname) { ret = fscrypt_fname_alloc_buffer(NAME_MAX, &_tname); if (ret) goto out_inode; tname = &_tname; } declen = ceph_base64_decode(name, name_len, tname->name); if (declen <= 0) { ret = -EIO; goto out; } iname.name = tname->name; iname.len = declen; } else { iname.name = fname->ctext; iname.len = fname->ctext_len; } ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname); if (!ret && (dir != fname->dir)) { char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)]; name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld", oname->len, oname->name, dir->i_ino); memcpy(oname->name, tmp_buf, name_len); oname->len = name_len; } out: fscrypt_fname_free_buffer(&_tname); out_inode: if (dir != fname->dir) { if ((dir->i_state & I_NEW)) discard_new_inode(dir); else iput(dir); } return ret; } /** * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper * @dir: directory inode for readdir prep * * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as * non-complete if this call results in having the directory unlocked. * * Returns: * 1 - if directory was locked and key is now loaded (i.e. dir is unlocked) * 0 - if directory is still locked * < 0 - if __fscrypt_prepare_readdir() fails */ int ceph_fscrypt_prepare_readdir(struct inode *dir) { bool had_key = fscrypt_has_encryption_key(dir); int err; if (!IS_ENCRYPTED(dir)) return 0; err = __fscrypt_prepare_readdir(dir); if (err) return err; if (!had_key && fscrypt_has_encryption_key(dir)) { /* directory just got unlocked, mark it as not complete */ ceph_dir_clear_complete(dir); return 1; } return 0; } int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { struct ceph_client *cl = ceph_inode_to_client(inode); doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, ceph_vinop(inode), len, offs, lblk_num); return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num); } int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { struct ceph_client *cl = ceph_inode_to_client(inode); doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, ceph_vinop(inode), len, offs, lblk_num); return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num); } /** * ceph_fscrypt_decrypt_pages - decrypt an array of pages * @inode: pointer to inode associated with these pages * @page: pointer to page array * @off: offset into the file that the read data starts * @len: max length to decrypt * * Decrypt an array of fscrypt'ed pages and return the amount of * data decrypted. Any data in the page prior to the start of the * first complete block in the read is ignored. Any incomplete * crypto blocks at the end of the array are ignored (and should * probably be zeroed by the caller). * * Returns the length of the decrypted data or a negative errno. */ int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page, u64 off, int len) { int i, num_blocks; u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT; int ret = 0; /* * We can't deal with partial blocks on an encrypted file, so mask off * the last bit. */ num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK); /* Decrypt each block */ for (i = 0; i < num_blocks; ++i) { int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT; int pgidx = blkoff >> PAGE_SHIFT; unsigned int pgoffs = offset_in_page(blkoff); int fret; fret = ceph_fscrypt_decrypt_block_inplace(inode, page[pgidx], CEPH_FSCRYPT_BLOCK_SIZE, pgoffs, baseblk + i); if (fret < 0) { if (ret == 0) ret = fret; break; } ret += CEPH_FSCRYPT_BLOCK_SIZE; } return ret; } /** * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer * @inode: inode associated with pages being decrypted * @page: pointer to page array * @off: offset into the file that the data in page[0] starts * @map: pointer to extent array * @ext_cnt: length of extent array * * Given an extent map and a page array, decrypt the received data in-place, * skipping holes. Returns the offset into buffer of end of last decrypted * block. */ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, u64 off, struct ceph_sparse_extent *map, u32 ext_cnt) { struct ceph_client *cl = ceph_inode_to_client(inode); int i, ret = 0; struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; u32 xlen; /* Nothing to do for empty array */ if (ext_cnt == 0) { doutc(cl, "%p %llx.%llx empty array, ret 0\n", inode, ceph_vinop(inode)); return 0; } ceph_calc_file_object_mapping(&ci->i_layout, off, map[0].len, &objno, &objoff, &xlen); for (i = 0; i < ext_cnt; ++i) { struct ceph_sparse_extent *ext = &map[i]; int pgsoff = ext->off - objoff; int pgidx = pgsoff >> PAGE_SHIFT; int fret; if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) { pr_warn_client(cl, "%p %llx.%llx bad encrypted sparse extent " "idx %d off %llx len %llx\n", inode, ceph_vinop(inode), i, ext->off, ext->len); return -EIO; } fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx], off + pgsoff, ext->len); doutc(cl, "%p %llx.%llx [%d] 0x%llx~0x%llx fret %d\n", inode, ceph_vinop(inode), i, ext->off, ext->len, fret); if (fret < 0) { if (ret == 0) ret = fret; break; } ret = pgsoff + fret; } doutc(cl, "ret %d\n", ret); return ret; } /** * ceph_fscrypt_encrypt_pages - encrypt an array of pages * @inode: pointer to inode associated with these pages * @page: pointer to page array * @off: offset into the file that the data starts * @len: max length to encrypt * * Encrypt an array of cleartext pages and return the amount of * data encrypted. Any data in the page prior to the start of the * first complete block in the read is ignored. Any incomplete * crypto blocks at the end of the array are ignored. * * Returns the length of the encrypted data or a negative errno. */ int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off, int len) { int i, num_blocks; u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT; int ret = 0; /* * We can't deal with partial blocks on an encrypted file, so mask off * the last bit. */ num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK); /* Encrypt each block */ for (i = 0; i < num_blocks; ++i) { int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT; int pgidx = blkoff >> PAGE_SHIFT; unsigned int pgoffs = offset_in_page(blkoff); int fret; fret = ceph_fscrypt_encrypt_block_inplace(inode, page[pgidx], CEPH_FSCRYPT_BLOCK_SIZE, pgoffs, baseblk + i); if (fret < 0) { if (ret == 0) ret = fret; break; } ret += CEPH_FSCRYPT_BLOCK_SIZE; } return ret; } |
| 580 579 187 4 238 239 3 13 13 195 537 286 376 43 42 8 212 213 213 290 193 213 231 151 290 533 290 99 107 116 179 179 179 179 25 305 588 119 466 165 447 23 2 3 166 166 149 147 147 147 24 25 22 22 22 19 3 76 155 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct ptr_ring' datastructure. * * Author: * Michael S. Tsirkin <mst@redhat.com> * * Copyright (C) 2016 Red Hat, Inc. * * This is a limited-size FIFO maintaining pointers in FIFO order, with * one CPU producing entries and another consuming entries from a FIFO. * * This implementation tries to minimize cache-contention when there is a * single producer and a single consumer CPU. */ #ifndef _LINUX_PTR_RING_H #define _LINUX_PTR_RING_H 1 #ifdef __KERNEL__ #include <linux/spinlock.h> #include <linux/cache.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/slab.h> #include <linux/mm.h> #include <asm/errno.h> #endif struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ int batch; /* number of entries to consume in a batch */ void **queue; }; /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). * * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: * see e.g. ptr_ring_full. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { return r->queue[r->producer]; } static inline bool ptr_ring_full(struct ptr_ring *r) { bool ret; spin_lock(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock(&r->producer_lock); return ret; } static inline bool ptr_ring_full_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_irq(&r->producer_lock); return ret; } static inline bool ptr_ring_full_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_full(r); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline bool ptr_ring_full_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_bh(&r->producer_lock); return ret; } /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. * Callers are responsible for making sure pointer that is being queued * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ /* Pairs with the dependency ordering in __ptr_ring_consume. */ smp_wmb(); WRITE_ONCE(r->queue[r->producer++], ptr); if (unlikely(r->producer >= r->size)) r->producer = 0; return 0; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * consume in interrupt or BH context, you must disable interrupts/BH when * calling this. */ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) { int ret; spin_lock(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock(&r->producer_lock); return ret; } static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) { int ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_irq(&r->producer_lock); return ret; } static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) { unsigned long flags; int ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_produce(r, ptr); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) { int ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_bh(&r->producer_lock); return ret; } static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) return READ_ONCE(r->queue[r->consumer_head]); return NULL; } /* * Test ring empty status without taking any locks. * * NB: This is only safe to call if ring is never resized. * * However, if some other CPU consumes ring entries at the same time, the value * returned is not guaranteed to be correct. * * In this case - to avoid incorrectly detecting the ring * as empty - the CPU consuming the ring entries is responsible * for either consuming all ring entries until the ring is empty, * or synchronizing with some other CPU and causing it to * re-test __ptr_ring_empty and/or consume the ring enteries * after the synchronization point. * * Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { if (likely(r->size)) return !r->queue[READ_ONCE(r->consumer_head)]; return true; } static inline bool ptr_ring_empty(struct ptr_ring *r) { bool ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_irq(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_empty(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline bool ptr_ring_empty_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_bh(&r->consumer_lock); return ret; } /* Zero entries from tail to specified head. * NB: if consumer_head can be >= r->size need to fixup tail later. */ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) { int head = consumer_head; /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines * besides the first one until we write out all entries. */ while (likely(head > r->consumer_tail)) r->queue[--head] = NULL; r->consumer_tail = consumer_head; } /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. * Doing it naively at each consume would be as simple as: * consumer = r->consumer; * r->queue[consumer++] = NULL; * if (unlikely(consumer >= r->size)) * consumer = 0; * r->consumer = consumer; * but that is suboptimal when the ring is full as producer is writing * out new entries in the same cache line. Defer these updates until a * batch of entries has been consumed. */ /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty * to work correctly. */ int consumer_head = r->consumer_head + 1; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. * We also do this when we reach end of the ring - not mandatory * but helps keep the implementation simple. */ if (unlikely(consumer_head - r->consumer_tail >= r->batch || consumer_head >= r->size)) __ptr_ring_zero_tail(r, consumer_head); if (unlikely(consumer_head >= r->size)) { consumer_head = 0; r->consumer_tail = 0; } /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, consumer_head); } static inline void *__ptr_ring_consume(struct ptr_ring *r) { void *ptr; /* The READ_ONCE in __ptr_ring_peek guarantees that anyone * accessing data through the pointer is up to date. Pairs * with smp_wmb in __ptr_ring_produce. */ ptr = __ptr_ring_peek(r); if (ptr) __ptr_ring_discard_one(r); return ptr; } static inline int __ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { void *ptr; int i; for (i = 0; i < n; i++) { ptr = __ptr_ring_consume(r); if (!ptr) break; array[i] = ptr; } return i; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when * producing. */ static inline void *ptr_ring_consume(struct ptr_ring *r) { void *ptr; spin_lock(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_irq(struct ptr_ring *r) { void *ptr; spin_lock_irq(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_irq(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_any(struct ptr_ring *r) { unsigned long flags; void *ptr; spin_lock_irqsave(&r->consumer_lock, flags); ptr = __ptr_ring_consume(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ptr; } static inline void *ptr_ring_consume_bh(struct ptr_ring *r) { void *ptr; spin_lock_bh(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_bh(&r->consumer_lock); return ptr; } static inline int ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { int ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irq(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, void **array, int n) { unsigned long flags; int ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_bh(&r->consumer_lock); return ret; } /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. */ #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) #define PTR_RING_PEEK_CALL(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_BH(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ unsigned long __PTR_RING_PEEK_CALL_f;\ \ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v; \ }) /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See * documentation for vmalloc for which of them are legal. */ static inline void **__ptr_ring_init_queue_alloc_noprof(unsigned int size, gfp_t gfp) { if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; return kvmalloc_array_noprof(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) { r->size = size; r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); /* We need to set batch at least to 1 to make logic * in __ptr_ring_discard_one work correctly. * Batching too much (because ring is small) would cause a lot of * burstiness. Needs tuning, for now disable batching. */ if (r->batch > r->size / 2 || !r->batch) r->batch = 1; } static inline int ptr_ring_init_noprof(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!r->queue) return -ENOMEM; __ptr_ring_set_size(r, size); r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); return 0; } #define ptr_ring_init(...) alloc_hooks(ptr_ring_init_noprof(__VA_ARGS__)) /* * Return entries into ring. Destroy entries that don't fit. * * Note: this is expected to be a rare slow path operation. * * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, void (*destroy)(void *)) { unsigned long flags; spin_lock_irqsave(&r->consumer_lock, flags); spin_lock(&r->producer_lock); if (!r->size) goto done; /* * Clean out buffered entries (for simplicity). This way following code * can test entries for NULL and if not assume they are valid. */ __ptr_ring_zero_tail(r, r->consumer_head); /* * Go over entries in batch, start moving head back and copy entries. * Stop when we run into previously unconsumed entries. */ while (n) { int head = r->consumer_head - 1; if (head < 0) head = r->size - 1; if (r->queue[head]) { /* This batch entry will have to be destroyed. */ goto done; } r->queue[head] = batch[--n]; r->consumer_tail = head; /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, head); } done: /* Destroy all entries left in the batch. */ while (n) destroy(batch[--n]); spin_unlock(&r->producer_lock); spin_unlock_irqrestore(&r->consumer_lock, flags); } static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) { int producer = 0; void **old; void *ptr; while ((ptr = __ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; else if (destroy) destroy(ptr); if (producer >= size) producer = 0; __ptr_ring_set_size(r, size); r->producer = producer; r->consumer_head = 0; r->consumer_tail = 0; old = r->queue; r->queue = queue; return old; } /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void **queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); void **old; if (!queue) return -ENOMEM; spin_lock_irqsave(&(r)->consumer_lock, flags); spin_lock(&(r)->producer_lock); old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); spin_unlock(&(r)->producer_lock); spin_unlock_irqrestore(&(r)->consumer_lock, flags); kvfree(old); return 0; } #define ptr_ring_resize(...) alloc_hooks(ptr_ring_resize_noprof(__VA_ARGS__)) /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in BH context, you must * disable BH when doing so. */ static inline int ptr_ring_resize_multiple_bh_noprof(struct ptr_ring **rings, unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { void ***queues; int i; queues = kmalloc_array_noprof(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; for (i = 0; i < nrings; ++i) { queues[i] = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!queues[i]) goto nomem; } for (i = 0; i < nrings; ++i) { spin_lock_bh(&(rings[i])->consumer_lock); spin_lock(&(rings[i])->producer_lock); queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], size, gfp, destroy); spin_unlock(&(rings[i])->producer_lock); spin_unlock_bh(&(rings[i])->consumer_lock); } for (i = 0; i < nrings; ++i) kvfree(queues[i]); kfree(queues); return 0; nomem: while (--i >= 0) kvfree(queues[i]); kfree(queues); noqueues: return -ENOMEM; } #define ptr_ring_resize_multiple_bh(...) \ alloc_hooks(ptr_ring_resize_multiple_bh_noprof(__VA_ARGS__)) static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { void *ptr; if (destroy) while ((ptr = ptr_ring_consume(r))) destroy(ptr); kvfree(r->queue); } #endif /* _LINUX_PTR_RING_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_ACCESSORS_H #define BTRFS_ACCESSORS_H #include <linux/unaligned.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/align.h> #include <linux/build_bug.h> #include <linux/compiler.h> #include <linux/string.h> #include <linux/mm.h> #include <uapi/linux/btrfs_tree.h> #include "extent_io.h" struct extent_buffer; /* * Some macros to generate set/get functions for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple one * for u8: */ #define le8_to_cpu(v) (v) #define cpu_to_le8(v) (v) #define __le8 u8 static inline u8 get_unaligned_le8(const void *p) { return *(const u8 *)p; } static inline void put_unaligned_le8(u8 val, void *p) { *(u8 *)p = val; } #define read_eb_member(eb, ptr, type, member, result) (\ read_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ sizeof_field(type, member))) #define write_eb_member(eb, ptr, type, member, source) ( \ write_extent_buffer(eb, (const char *)(source), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ sizeof_field(type, member))) #define DECLARE_BTRFS_SETGET_BITS(bits) \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off); \ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ unsigned long off, u##bits val); DECLARE_BTRFS_SETGET_BITS(8) DECLARE_BTRFS_SETGET_BITS(16) DECLARE_BTRFS_SETGET_BITS(32) DECLARE_BTRFS_SETGET_BITS(64) #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ const type *s) \ { \ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ return btrfs_get_##bits(eb, s, offsetof(type, member)); \ } \ static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ u##bits val) \ { \ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ { \ const type *p = folio_address(eb->folios[0]) + \ offset_in_page(eb->start); \ return get_unaligned_le##bits(&p->member); \ } \ static inline void btrfs_set_##name(const struct extent_buffer *eb, \ u##bits val) \ { \ type *p = folio_address(eb->folios[0]) + offset_in_page(eb->start); \ put_unaligned_le##bits(val, &p->member); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(const type *s) \ { \ return get_unaligned_le##bits(&s->member); \ } \ static inline void btrfs_set_##name(type *s, u##bits val) \ { \ put_unaligned_le##bits(val, &s->member); \ } static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s) { static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes)); return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes)); } static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s, u64 val) { static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes)); WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); } BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item, start_offset, 64); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, io_align, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, dev_group, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, seek_speed, 8); BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, bandwidth, 8); BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, generation, 64); static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) { return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); } static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) { return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); } BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) { return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); } BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, io_align, 32); BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, io_width, 32); BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, sector_size, 32); BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, int nr) { unsigned long offset = (unsigned long)c; offset += offsetof(struct btrfs_chunk, stripe); offset += nr * sizeof(struct btrfs_stripe); return (struct btrfs_stripe *)offset; } static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) { return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); } static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb, struct btrfs_chunk *c, int nr) { return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); } static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, struct btrfs_chunk *c, int nr, u64 val) { btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); } static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb, struct btrfs_chunk *c, int nr) { return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); } static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, struct btrfs_chunk *c, int nr, u64 val) { btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); } /* struct btrfs_block_group_item */ BTRFS_SETGET_STACK_FUNCS(stack_block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_STACK_FUNCS(stack_block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); BTRFS_SETGET_FUNCS(block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); BTRFS_SETGET_FUNCS(block_group_flags, struct btrfs_block_group_item, flags, 64); BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags, struct btrfs_block_group_item, flags, 64); /* struct btrfs_free_space_info */ BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, extent_count, 32); BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_index, struct btrfs_inode_ref, index, 64); /* struct btrfs_inode_extref */ BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, parent_objectid, 64); BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, name_len, 16); BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, sequence, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, nbytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, block_group, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64); BTRFS_SETGET_FUNCS(raid_stride_physical, struct btrfs_raid_stride, physical, 64); BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_devid, struct btrfs_raid_stride, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_physical, struct btrfs_raid_stride, physical, 64); /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, chunk_tree, 64); BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, chunk_objectid, 64); BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_chunk_tree, struct btrfs_dev_extent, chunk_tree, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_chunk_objectid, struct btrfs_dev_extent, chunk_objectid, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent, length, 64); BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, generation, 64); BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); static inline void btrfs_tree_block_key(const struct extent_buffer *eb, struct btrfs_tree_block_info *item, struct btrfs_disk_key *key) { read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); } static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb, struct btrfs_tree_block_info *item, const struct btrfs_disk_key *key) { write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); } BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, root, 64); BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, objectid, 64); BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, offset, 64); BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 32); BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32); BTRFS_SETGET_FUNCS(extent_owner_ref_root_id, struct btrfs_extent_owner_ref, root_id, 64); BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, type, 8); BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, offset, 64); static inline u32 btrfs_extent_inline_ref_size(int type) { if (type == BTRFS_TREE_BLOCK_REF_KEY || type == BTRFS_SHARED_BLOCK_REF_KEY) return sizeof(struct btrfs_extent_inline_ref); if (type == BTRFS_SHARED_DATA_REF_KEY) return sizeof(struct btrfs_shared_data_ref) + sizeof(struct btrfs_extent_inline_ref); if (type == BTRFS_EXTENT_DATA_REF_KEY) return sizeof(struct btrfs_extent_data_ref) + offsetof(struct btrfs_extent_inline_ref, offset); if (type == BTRFS_EXTENT_OWNER_REF_KEY) return sizeof(struct btrfs_extent_inline_ref); return 0; } /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, generation, 64); static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb, int nr, u64 val) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); } static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb, int nr, u64 val) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); } static inline unsigned long btrfs_node_key_ptr_offset(const struct extent_buffer *eb, int nr) { return offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; } void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr); static inline void btrfs_set_node_key(const struct extent_buffer *eb, const struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr; ptr = btrfs_node_key_ptr_offset(eb, nr); write_eb_member(eb, (struct btrfs_key_ptr *)ptr, struct btrfs_key_ptr, key, disk_key); } /* struct btrfs_item */ BTRFS_SETGET_FUNCS(raw_item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_FUNCS(raw_item_size, struct btrfs_item, size, 32); BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); static inline unsigned long btrfs_item_nr_offset(const struct extent_buffer *eb, int nr) { return offsetof(struct btrfs_leaf, items) + sizeof(struct btrfs_item) * nr; } static inline struct btrfs_item *btrfs_item_nr(const struct extent_buffer *eb, int nr) { return (struct btrfs_item *)btrfs_item_nr_offset(eb, nr); } #define BTRFS_ITEM_SETGET_FUNCS(member) \ static inline u32 btrfs_item_##member(const struct extent_buffer *eb, int slot) \ { \ return btrfs_raw_item_##member(eb, btrfs_item_nr(eb, slot)); \ } \ static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \ int slot, u32 val) \ { \ btrfs_set_raw_item_##member(eb, btrfs_item_nr(eb, slot), val); \ } BTRFS_ITEM_SETGET_FUNCS(offset) BTRFS_ITEM_SETGET_FUNCS(size); static inline u32 btrfs_item_data_end(const struct extent_buffer *eb, int nr) { return btrfs_item_offset(eb, nr) + btrfs_item_size(eb, nr); } static inline void btrfs_item_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { struct btrfs_item *item = btrfs_item_nr(eb, nr); read_eb_member(eb, item, struct btrfs_item, key, disk_key); } static inline void btrfs_set_item_key(struct extent_buffer *eb, const struct btrfs_disk_key *disk_key, int nr) { struct btrfs_item *item = btrfs_item_nr(eb, nr); write_eb_member(eb, item, struct btrfs_item, key, disk_key); } BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); /* struct btrfs_root_ref */ BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64); BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64); BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16); BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64); BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64); BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16); /* struct btrfs_dir_item */ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); BTRFS_SETGET_STACK_FUNCS(stack_dir_flags, struct btrfs_dir_item, type, 8); BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); static inline u8 btrfs_dir_ftype(const struct extent_buffer *eb, const struct btrfs_dir_item *item) { return btrfs_dir_flags_to_ftype(btrfs_dir_flags(eb, item)); } static inline u8 btrfs_stack_dir_ftype(const struct btrfs_dir_item *item) { return btrfs_dir_flags_to_ftype(btrfs_stack_dir_flags(item)); } static inline void btrfs_dir_item_key(const struct extent_buffer *eb, const struct btrfs_dir_item *item, struct btrfs_disk_key *key) { read_eb_member(eb, item, struct btrfs_dir_item, location, key); } static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, const struct btrfs_disk_key *key) { write_eb_member(eb, item, struct btrfs_dir_item, location, key); } BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, num_entries, 64); BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, num_bitmaps, 64); BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, generation, 64); static inline void btrfs_free_space_key(const struct extent_buffer *eb, const struct btrfs_free_space_header *h, struct btrfs_disk_key *key) { read_eb_member(eb, h, struct btrfs_free_space_header, location, key); } static inline void btrfs_set_free_space_key(struct extent_buffer *eb, struct btrfs_free_space_header *h, const struct btrfs_disk_key *key) { write_eb_member(eb, h, struct btrfs_free_space_header, location, key); } /* struct btrfs_disk_key */ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, objectid, 64); BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); #ifdef __LITTLE_ENDIAN /* * Optimized helpers for little-endian architectures where CPU and on-disk * structures have the same endianness and we can skip conversions. */ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key, const struct btrfs_disk_key *disk_key) { memcpy(cpu_key, disk_key, sizeof(struct btrfs_key)); } static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key, const struct btrfs_key *cpu_key) { memcpy(disk_key, cpu_key, sizeof(struct btrfs_key)); } static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, struct btrfs_key *cpu_key, int nr) { struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; btrfs_node_key(eb, disk_key, nr); } static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, struct btrfs_key *cpu_key, int nr) { struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; btrfs_item_key(eb, disk_key, nr); } static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, const struct btrfs_dir_item *item, struct btrfs_key *cpu_key) { struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; btrfs_dir_item_key(eb, item, disk_key); } #else static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, const struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, const struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, struct btrfs_key *key, int nr) { struct btrfs_disk_key disk_key; btrfs_node_key(eb, &disk_key, nr); btrfs_disk_key_to_cpu(key, &disk_key); } static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, struct btrfs_key *key, int nr) { struct btrfs_disk_key disk_key; btrfs_item_key(eb, &disk_key, nr); btrfs_disk_key_to_cpu(key, &disk_key); } static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, const struct btrfs_dir_item *item, struct btrfs_key *key) { struct btrfs_disk_key disk_key; btrfs_dir_item_key(eb, item, &disk_key); btrfs_disk_key_to_cpu(key, &disk_key); } #endif /* struct btrfs_header */ BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) { return (btrfs_header_flags(eb) & flag) == flag; } static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) { u64 flags = btrfs_header_flags(eb); btrfs_set_header_flags(eb, flags | flag); } static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) { u64 flags = btrfs_header_flags(eb); btrfs_set_header_flags(eb, flags & ~flag); } static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) { u64 flags = btrfs_header_flags(eb); return flags >> BTRFS_BACKREF_REV_SHIFT; } static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, int rev) { u64 flags = btrfs_header_flags(eb); flags &= ~BTRFS_BACKREF_REV_MASK; flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; btrfs_set_header_flags(eb, flags); } static inline int btrfs_is_leaf(const struct extent_buffer *eb) { return btrfs_header_level(eb) == 0; } /* struct btrfs_root_item */ BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, generation, 64); BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8); BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, last_snapshot, 64); BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, generation_v2, 64); BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, ctransid, 64); BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, otransid, 64); BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, stransid, 64); BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, rtransid, 64); /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, tree_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, tree_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, chunk_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, chunk_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, chunk_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, extent_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, extent_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, extent_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, fs_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, fs_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, fs_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, dev_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, dev_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, dev_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, csum_root, 64); BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, csum_root_gen, 64); BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, csum_root_level, 8); BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, num_devices, 64); /* struct btrfs_balance_item */ BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); static inline void btrfs_balance_data(const struct extent_buffer *eb, const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); } static inline void btrfs_set_balance_data(struct extent_buffer *eb, struct btrfs_balance_item *bi, const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); } static inline void btrfs_balance_meta(const struct extent_buffer *eb, const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); } static inline void btrfs_set_balance_meta(struct extent_buffer *eb, struct btrfs_balance_item *bi, const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); } static inline void btrfs_balance_sys(const struct extent_buffer *eb, const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); } static inline void btrfs_set_balance_sys(struct extent_buffer *eb, struct btrfs_balance_item *bi, const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); } /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, struct btrfs_super_block, sys_chunk_array_size, 32); BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, struct btrfs_super_block, chunk_root_generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, chunk_root, 64); BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, chunk_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, log_root, 64); BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, log_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, num_devices, 64); BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, compat_flags, 64); BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, compat_ro_flags, 64); BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, incompat_flags, 64); BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, cache_generation, 64); BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, uuid_tree_generation, 64); BTRFS_SETGET_STACK_FUNCS(super_nr_global_roots, struct btrfs_super_block, nr_global_roots, 64); /* struct btrfs_file_extent_item */ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item, disk_bytenr, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item, offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item, num_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item, ram_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes, struct btrfs_file_extent_item, disk_num_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item, compression, 8); BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, generation, 64); BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, disk_num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, ram_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, compression, 8); BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, encryption, 8); BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, other_encoding, 16); /* btrfs_qgroup_status_item */ BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, generation, 64); BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, version, 64); BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, flags, 64); BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, rescan, 64); BTRFS_SETGET_FUNCS(qgroup_status_enable_gen, struct btrfs_qgroup_status_item, enable_gen, 64); /* btrfs_qgroup_info_item */ BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, generation, 64); BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, rfer_cmpr, 64); BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, excl_cmpr, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, struct btrfs_qgroup_info_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, rfer_cmpr, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, excl_cmpr, 64); /* btrfs_qgroup_limit_item */ BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, flags, 64); BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, max_rfer, 64); BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, max_excl, 64); BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, rsv_rfer, 64); BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, rsv_excl, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags, struct btrfs_qgroup_limit_item, flags, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, max_rfer, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, max_excl, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, rsv_rfer, 64); BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, rsv_excl, 64); /* btrfs_dev_replace_item */ BTRFS_SETGET_FUNCS(dev_replace_src_devid, struct btrfs_dev_replace_item, src_devid, 64); BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode, struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, 64); BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item, replace_state, 64); BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item, time_started, 64); BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item, time_stopped, 64); BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item, num_write_errors, 64); BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors, struct btrfs_dev_replace_item, num_uncorrectable_read_errors, 64); BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item, cursor_left, 64); BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item, cursor_right, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid, struct btrfs_dev_replace_item, src_devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode, struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state, struct btrfs_dev_replace_item, replace_state, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started, struct btrfs_dev_replace_item, time_started, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped, struct btrfs_dev_replace_item, time_stopped, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors, struct btrfs_dev_replace_item, num_write_errors, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors, struct btrfs_dev_replace_item, num_uncorrectable_read_errors, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left, struct btrfs_dev_replace_item, cursor_left, 64); BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, struct btrfs_dev_replace_item, cursor_right, 64); /* btrfs_verity_descriptor_item */ BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item, encryption, 8); BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item, size, 64); BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption, struct btrfs_verity_descriptor_item, encryption, 8); BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size, struct btrfs_verity_descriptor_item, size, 64); /* Cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_item_nr_offset(leaf, 0) + btrfs_item_offset(leaf, slot))) #define btrfs_item_ptr_offset(leaf, slot) \ ((unsigned long)(btrfs_item_nr_offset(leaf, 0) + btrfs_item_offset(leaf, slot))) #endif |
| 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 | // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" /** * e1000_raise_eec_clk - Raise EEPROM clock * @hw: pointer to the HW structure * @eecd: pointer to the EEPROM * * Enable/Raise the EEPROM clock bit. **/ static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) { *eecd = *eecd | E1000_EECD_SK; ew32(EECD, *eecd); e1e_flush(); udelay(hw->nvm.delay_usec); } /** * e1000_lower_eec_clk - Lower EEPROM clock * @hw: pointer to the HW structure * @eecd: pointer to the EEPROM * * Clear/Lower the EEPROM clock bit. **/ static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) { *eecd = *eecd & ~E1000_EECD_SK; ew32(EECD, *eecd); e1e_flush(); udelay(hw->nvm.delay_usec); } /** * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM * @hw: pointer to the HW structure * @data: data to send to the EEPROM * @count: number of bits to shift out * * We need to shift 'count' bits out to the EEPROM. So, the value in the * "data" parameter will be shifted out to the EEPROM one bit at a time. * In order to do this, "data" must be broken down into bits. **/ static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = er32(EECD); u32 mask; mask = BIT(count - 1); if (nvm->type == e1000_nvm_eeprom_spi) eecd |= E1000_EECD_DO; do { eecd &= ~E1000_EECD_DI; if (data & mask) eecd |= E1000_EECD_DI; ew32(EECD, eecd); e1e_flush(); udelay(nvm->delay_usec); e1000_raise_eec_clk(hw, &eecd); e1000_lower_eec_clk(hw, &eecd); mask >>= 1; } while (mask); eecd &= ~E1000_EECD_DI; ew32(EECD, eecd); } /** * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM * @hw: pointer to the HW structure * @count: number of bits to shift in * * In order to read a register from the EEPROM, we need to shift 'count' bits * in from the EEPROM. Bits are "shifted in" by raising the clock input to * the EEPROM (setting the SK bit), and then reading the value of the data out * "DO" bit. During this "shifting in" process the data in "DI" bit should * always be clear. **/ static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) { u32 eecd; u32 i; u16 data; eecd = er32(EECD); eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); data = 0; for (i = 0; i < count; i++) { data <<= 1; e1000_raise_eec_clk(hw, &eecd); eecd = er32(EECD); eecd &= ~E1000_EECD_DI; if (eecd & E1000_EECD_DO) data |= 1; e1000_lower_eec_clk(hw, &eecd); } return data; } /** * e1000e_poll_eerd_eewr_done - Poll for EEPROM read/write completion * @hw: pointer to the HW structure * @ee_reg: EEPROM flag for polling * * Polls the EEPROM status bit for either read or write completion based * upon the value of 'ee_reg'. **/ s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) { u32 attempts = 100000; u32 i, reg = 0; for (i = 0; i < attempts; i++) { if (ee_reg == E1000_NVM_POLL_READ) reg = er32(EERD); else reg = er32(EEWR); if (reg & E1000_NVM_RW_REG_DONE) return 0; udelay(5); } return -E1000_ERR_NVM; } /** * e1000e_acquire_nvm - Generic request for access to EEPROM * @hw: pointer to the HW structure * * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ s32 e1000e_acquire_nvm(struct e1000_hw *hw) { u32 eecd = er32(EECD); s32 timeout = E1000_NVM_GRANT_ATTEMPTS; ew32(EECD, eecd | E1000_EECD_REQ); eecd = er32(EECD); while (timeout) { if (eecd & E1000_EECD_GNT) break; udelay(5); eecd = er32(EECD); timeout--; } if (!timeout) { eecd &= ~E1000_EECD_REQ; ew32(EECD, eecd); e_dbg("Could not acquire NVM grant\n"); return -E1000_ERR_NVM; } return 0; } /** * e1000_standby_nvm - Return EEPROM to standby state * @hw: pointer to the HW structure * * Return the EEPROM to a standby state. **/ static void e1000_standby_nvm(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = er32(EECD); if (nvm->type == e1000_nvm_eeprom_spi) { /* Toggle CS to flush commands */ eecd |= E1000_EECD_CS; ew32(EECD, eecd); e1e_flush(); udelay(nvm->delay_usec); eecd &= ~E1000_EECD_CS; ew32(EECD, eecd); e1e_flush(); udelay(nvm->delay_usec); } } /** * e1000_stop_nvm - Terminate EEPROM command * @hw: pointer to the HW structure * * Terminates the current command by inverting the EEPROM's chip select pin. **/ static void e1000_stop_nvm(struct e1000_hw *hw) { u32 eecd; eecd = er32(EECD); if (hw->nvm.type == e1000_nvm_eeprom_spi) { /* Pull CS high */ eecd |= E1000_EECD_CS; e1000_lower_eec_clk(hw, &eecd); } } /** * e1000e_release_nvm - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit. **/ void e1000e_release_nvm(struct e1000_hw *hw) { u32 eecd; e1000_stop_nvm(hw); eecd = er32(EECD); eecd &= ~E1000_EECD_REQ; ew32(EECD, eecd); } /** * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write * @hw: pointer to the HW structure * * Setups the EEPROM for reading and writing. **/ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = er32(EECD); u8 spi_stat_reg; if (nvm->type == e1000_nvm_eeprom_spi) { u16 timeout = NVM_MAX_RETRY_SPI; /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ew32(EECD, eecd); e1e_flush(); udelay(1); /* Read "Status Register" repeatedly until the LSB is cleared. * The EEPROM will signal that the command has been completed * by clearing bit 0 of the internal status register. If it's * not cleared within 'timeout', then error out. */ while (timeout) { e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, hw->nvm.opcode_bits); spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) break; udelay(5); e1000_standby_nvm(hw); timeout--; } if (!timeout) { e_dbg("SPI NVM Status error\n"); return -E1000_ERR_NVM; } } return 0; } /** * e1000e_read_nvm_eerd - Reads EEPROM using EERD register * @hw: pointer to the HW structure * @offset: offset of word in the EEPROM to read * @words: number of words to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM using the EERD register. **/ s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i, eerd = 0; s32 ret_val = 0; /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { e_dbg("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } for (i = 0; i < words; i++) { eerd = ((offset + i) << E1000_NVM_RW_ADDR_SHIFT) + E1000_NVM_RW_REG_START; ew32(EERD, eerd); ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); if (ret_val) { e_dbg("NVM read error: %d\n", ret_val); break; } data[i] = (er32(EERD) >> E1000_NVM_RW_REG_DATA); } return ret_val; } /** * e1000e_write_nvm_spi - Write to EEPROM using SPI * @hw: pointer to the HW structure * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the EEPROM * * Writes data to EEPROM at offset using SPI interface. * * If e1000e_update_nvm_checksum is not called after this function , the * EEPROM will most likely contain an invalid checksum. **/ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; s32 ret_val = -E1000_ERR_NVM; u16 widx = 0; /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { e_dbg("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } while (widx < words) { u8 write_opcode = NVM_WRITE_OPCODE_SPI; ret_val = nvm->ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_ready_nvm_eeprom(hw); if (ret_val) { nvm->ops.release(hw); return ret_val; } e1000_standby_nvm(hw); /* Send the WRITE ENABLE command (8 bit opcode) */ e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, nvm->opcode_bits); e1000_standby_nvm(hw); /* Some SPI eeproms use the 8th address bit embedded in the * opcode */ if ((nvm->address_bits == 8) && (offset >= 128)) write_opcode |= NVM_A8_OPCODE_SPI; /* Send the Write command (8-bit opcode + addr) */ e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), nvm->address_bits); /* Loop to allow for up to whole page write of eeprom */ while (widx < words) { u16 word_out = data[widx]; word_out = (word_out >> 8) | (word_out << 8); e1000_shift_out_eec_bits(hw, word_out, 16); widx++; if ((((offset + widx) * 2) % nvm->page_size) == 0) { e1000_standby_nvm(hw); break; } } usleep_range(10000, 11000); nvm->ops.release(hw); } return ret_val; } /** * e1000_read_pba_string_generic - Read device part number * @hw: pointer to the HW structure * @pba_num: pointer to device part number * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. **/ s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) { s32 ret_val; u16 nvm_data; u16 pba_ptr; u16 offset; u16 length; if (pba_num == NULL) { e_dbg("PBA string buffer was null\n"); return -E1000_ERR_INVALID_ARGUMENT; } ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); if (ret_val) { e_dbg("NVM Read Error\n"); return ret_val; } ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); if (ret_val) { e_dbg("NVM Read Error\n"); return ret_val; } /* if nvm_data is not ptr guard the PBA must be in legacy format which * means pba_ptr is actually our second data word for the PBA number * and we can decode it into an ascii string */ if (nvm_data != NVM_PBA_PTR_GUARD) { e_dbg("NVM PBA number is not stored as string\n"); /* make sure callers buffer is big enough to store the PBA */ if (pba_num_size < E1000_PBANUM_LENGTH) { e_dbg("PBA string buffer too small\n"); return E1000_ERR_NO_SPACE; } /* extract hex string from data and pba_ptr */ pba_num[0] = (nvm_data >> 12) & 0xF; pba_num[1] = (nvm_data >> 8) & 0xF; pba_num[2] = (nvm_data >> 4) & 0xF; pba_num[3] = nvm_data & 0xF; pba_num[4] = (pba_ptr >> 12) & 0xF; pba_num[5] = (pba_ptr >> 8) & 0xF; pba_num[6] = '-'; pba_num[7] = 0; pba_num[8] = (pba_ptr >> 4) & 0xF; pba_num[9] = pba_ptr & 0xF; /* put a null character on the end of our string */ pba_num[10] = '\0'; /* switch all the data but the '-' to hex char */ for (offset = 0; offset < 10; offset++) { if (pba_num[offset] < 0xA) pba_num[offset] += '0'; else if (pba_num[offset] < 0x10) pba_num[offset] += 'A' - 0xA; } return 0; } ret_val = e1000_read_nvm(hw, pba_ptr, 1, &length); if (ret_val) { e_dbg("NVM Read Error\n"); return ret_val; } if (length == 0xFFFF || length == 0) { e_dbg("NVM PBA number section invalid length\n"); return -E1000_ERR_NVM_PBA_SECTION; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { e_dbg("PBA string buffer too small\n"); return -E1000_ERR_NO_SPACE; } /* trim pba length from start of string */ pba_ptr++; length--; for (offset = 0; offset < length; offset++) { ret_val = e1000_read_nvm(hw, pba_ptr + offset, 1, &nvm_data); if (ret_val) { e_dbg("NVM Read Error\n"); return ret_val; } pba_num[offset * 2] = (u8)(nvm_data >> 8); pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); } pba_num[offset * 2] = '\0'; return 0; } /** * e1000_read_mac_addr_generic - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. * Since devices with two ports use the same EEPROM, we increment the * last bit in the MAC address for the second port. **/ s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) { u32 rar_high; u32 rar_low; u16 i; rar_high = er32(RAH(0)); rar_low = er32(RAL(0)); for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8)); for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8)); for (i = 0; i < ETH_ALEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; return 0; } /** * e1000e_validate_nvm_checksum_generic - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); if (ret_val) { e_dbg("NVM Read Error\n"); return ret_val; } checksum += nvm_data; } if (hw->mac.type == e1000_pch_tgp && nvm_data == NVM_CHECKSUM_UNINITIALIZED) { e_dbg("Uninitialized NVM Checksum on TGP platform - ignoring\n"); return 0; } if (checksum != NVM_SUM) { e_dbg("NVM Checksum Invalid\n"); return -E1000_ERR_NVM; } return 0; } /** * e1000e_update_nvm_checksum_generic - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. **/ s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; for (i = 0; i < NVM_CHECKSUM_REG; i++) { ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); if (ret_val) { e_dbg("NVM Read Error while updating checksum.\n"); return ret_val; } checksum += nvm_data; } checksum = NVM_SUM - checksum; ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) e_dbg("NVM Write Error while updating checksum.\n"); return ret_val; } /** * e1000e_reload_nvm_generic - Reloads EEPROM * @hw: pointer to the HW structure * * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the * extended control register. **/ void e1000e_reload_nvm_generic(struct e1000_hw *hw) { u32 ctrl_ext; usleep_range(10, 20); ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; ew32(CTRL_EXT, ctrl_ext); e1e_flush(); } |
| 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1034 1033 1033 1033 1033 482 484 482 482 83 83 83 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * This file implements the various access functions for the * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * * Authors: David S. Miller (davem@caip.rutgers.edu) * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ #include <linux/socket.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stddef.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp.h> #include <net/udp.h> #include <net/transp_v6.h> #include <net/ipv6.h> #define MAX4(a, b, c, d) \ MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d)) #define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ IPSTATS_MIB_MAX, ICMP_MIB_MAX) static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(net, &udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %u memory %lu\n", atomic_read(&net->ipv6.fqdir->rhashtable.nelems), frag_mem_limit(net->ipv6.fqdir)); return 0; } static const struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS), SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES), SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS), SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS), SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS), SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS), SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS), SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS), SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES), SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS), SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS), SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS), SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS), SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS), SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS), }; static const struct snmp_mib snmp6_icmp6_list[] = { /* icmpv6 mib according to RFC 2466 */ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS), /* ICMP6_MIB_RATELIMITHOST needs to be last, see snmp6_dev_seq_show(). */ SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST), }; static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), }; static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS), }; static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) { char name[32]; int i; /* print by name -- deprecated items */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { const char *p = NULL; int icmptype; #define CASE(TYP, STR) case TYP: p = STR; break; icmptype = i & 0xff; switch (icmptype) { /* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */ CASE(ICMPV6_DEST_UNREACH, "DestUnreachs") CASE(ICMPV6_PKT_TOOBIG, "PktTooBigs") CASE(ICMPV6_TIME_EXCEED, "TimeExcds") CASE(ICMPV6_PARAMPROB, "ParmProblems") CASE(ICMPV6_ECHO_REQUEST, "Echos") CASE(ICMPV6_ECHO_REPLY, "EchoReplies") CASE(ICMPV6_MGM_QUERY, "GroupMembQueries") CASE(ICMPV6_MGM_REPORT, "GroupMembResponses") CASE(ICMPV6_MGM_REDUCTION, "GroupMembReductions") CASE(ICMPV6_MLD2_REPORT, "MLDv2Reports") CASE(NDISC_ROUTER_ADVERTISEMENT, "RouterAdvertisements") CASE(NDISC_ROUTER_SOLICITATION, "RouterSolicits") CASE(NDISC_NEIGHBOUR_ADVERTISEMENT, "NeighborAdvertisements") CASE(NDISC_NEIGHBOUR_SOLICITATION, "NeighborSolicits") CASE(NDISC_REDIRECT, "Redirects") } #undef CASE if (!p) /* don't print un-named types here */ continue; snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, atomic_long_read(smib + i)); } /* print by number (nonzero only) - ICMPMsgStat format */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { unsigned long val; val = atomic_long_read(smib + i); if (!val) continue; snprintf(name, sizeof(name), "Icmp6%sType%u", i & 0x100 ? "Out" : "In", i & 0xff); seq_printf(seq, "%-32s\t%lu\n", name, val); } } /* can be called either with percpu mib (pcpumib != NULL), * or shared one (smib != NULL) */ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist, int cnt) { unsigned long buff[SNMP_MIB_MAX]; int i; if (pcpumib) { memset(buff, 0, sizeof(unsigned long) * cnt); snmp_get_cpu_field_batch_cnt(buff, itemlist, cnt, pcpumib); for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, buff[i]); } else { for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, int cnt, size_t syncpoff) { u64 buff64[SNMP_MIB_MAX]; int i; memset(buff64, 0, sizeof(u64) * cnt); snmp_get_cpu_field64_batch_cnt(buff64, itemlist, cnt, mib, syncpoff); for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, ARRAY_SIZE(snmp6_ipstats_list), offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list, ARRAY_SIZE(snmp6_icmp6_list)); snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list, ARRAY_SIZE(snmp6_udp6_list)); snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, NULL, snmp6_udplite6_list, ARRAY_SIZE(snmp6_udplite6_list)); return 0; } static int snmp6_dev_seq_show(struct seq_file *seq, void *v) { struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, ARRAY_SIZE(snmp6_ipstats_list), offsetof(struct ipstats_mib, syncp)); /* Per idev icmp stats do not have ICMP6_MIB_RATELIMITHOST */ snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list, ARRAY_SIZE(snmp6_icmp6_list) - 1); snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs); return 0; } int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; struct net *net; if (!idev || !idev->dev) return -EINVAL; net = dev_net(idev->dev); if (!net->mib.proc_net_devsnmp6) return -ENOENT; p = proc_create_single_data(idev->dev->name, 0444, net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev); if (!p) return -ENOMEM; idev->stats.proc_dir_entry = p; return 0; } int snmp6_unregister_dev(struct inet6_dev *idev) { struct net *net = dev_net(idev->dev); if (!net->mib.proc_net_devsnmp6) return -ENOENT; if (!idev->stats.proc_dir_entry) return -EINVAL; proc_remove(idev->stats.proc_dir_entry); idev->stats.proc_dir_entry = NULL; return 0; } static int __net_init ipv6_proc_init_net(struct net *net) { if (!proc_create_net_single("sockstat6", 0444, net->proc_net, sockstat6_seq_show, NULL)) return -ENOMEM; if (!proc_create_net_single("snmp6", 0444, net->proc_net, snmp6_seq_show, NULL)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); if (!net->mib.proc_net_devsnmp6) goto proc_dev_snmp6_fail; return 0; proc_dev_snmp6_fail: remove_proc_entry("snmp6", net->proc_net); proc_snmp6_fail: remove_proc_entry("sockstat6", net->proc_net); return -ENOMEM; } static void __net_exit ipv6_proc_exit_net(struct net *net) { remove_proc_entry("sockstat6", net->proc_net); remove_proc_entry("dev_snmp6", net->proc_net); remove_proc_entry("snmp6", net->proc_net); } static struct pernet_operations ipv6_proc_ops = { .init = ipv6_proc_init_net, .exit = ipv6_proc_exit_net, }; int __init ipv6_misc_proc_init(void) { return register_pernet_subsys(&ipv6_proc_ops); } void ipv6_misc_proc_exit(void) { unregister_pernet_subsys(&ipv6_proc_ops); } |
| 3 3 3 3 3 8 8 8 8 8 11 7 2 1 1 1 1 2 11 7 7 7 7 7 7 8 3 3 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 5 8 4 8 5 8 8 8 8 8 8 8 4 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 | // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * * terminology * * cluster - allocation unit - 512,1K,2K,4K,...,2M * vcn - virtual cluster number - Offset inside the file in clusters. * vbo - virtual byte offset - Offset inside the file in bytes. * lcn - logical cluster number - 0 based cluster in clusters heap. * lbo - logical byte offset - Absolute position inside volume. * run - maps VCN to LCN - Stored in attributes in packed form. * attr - attribute segment - std/name/data etc records inside MFT. * mi - MFT inode - One MFT record(usually 1024 bytes or 4K), consists of attributes. * ni - NTFS inode - Extends linux inode. consists of one or more mft inodes. * index - unit inside directory - 2K, 4K, <=page size, does not depend on cluster size. * * WSL - Windows Subsystem for Linux * https://docs.microsoft.com/en-us/windows/wsl/file-permissions * It stores uid/gid/mode/dev in xattr * * ntfs allows up to 2^64 clusters per volume. * It means you should use 64 bits lcn to operate with ntfs. * Implementation of ntfs.sys uses only 32 bits lcn. * Default ntfs3 uses 32 bits lcn too. * ntfs3 built with CONFIG_NTFS3_64BIT_CLUSTER (ntfs3_64) uses 64 bits per lcn. * * * ntfs limits, cluster size is 4K (2^12) * ----------------------------------------------------------------------------- * | Volume size | Clusters | ntfs.sys | ntfs3 | ntfs3_64 | mkntfs | chkdsk | * ----------------------------------------------------------------------------- * | < 16T, 2^44 | < 2^32 | yes | yes | yes | yes | yes | * | > 16T, 2^44 | > 2^32 | no | no | yes | yes | yes | * ----------------------------------------------------------|------------------ * * To mount large volumes as ntfs one should use large cluster size (up to 2M) * The maximum volume size in this case is 2^32 * 2^21 = 2^53 = 8P * * ntfs limits, cluster size is 2M (2^21) * ----------------------------------------------------------------------------- * | < 8P, 2^53 | < 2^32 | yes | yes | yes | yes | yes | * | > 8P, 2^53 | > 2^32 | no | no | yes | yes | yes | * ----------------------------------------------------------|------------------ * */ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/log2.h> #include <linux/minmax.h> #include <linux/module.h> #include <linux/nls.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/statfs.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" #ifdef CONFIG_NTFS3_LZX_XPRESS #include "lib/lib.h" #endif #ifdef CONFIG_PRINTK /* * ntfs_printk - Trace warnings/notices/errors. * * Thanks Joe Perches <joe@perches.com> for implementation */ void ntfs_printk(const struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; int level; struct ntfs_sb_info *sbi = sb->s_fs_info; /* Should we use different ratelimits for warnings/notices/errors? */ if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3")) return; va_start(args, fmt); level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; printk("%c%cntfs3(%s): %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf); va_end(args); } static char s_name_buf[512]; static atomic_t s_name_buf_cnt = ATOMIC_INIT(1); // 1 means 'free s_name_buf'. /* * ntfs_inode_printk * * Print warnings/notices/errors about inode using name or inode number. */ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...) { struct super_block *sb = inode->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; char *name; va_list args; struct va_format vaf; int level; if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3")) return; /* Use static allocated buffer, if possible. */ name = atomic_dec_and_test(&s_name_buf_cnt) ? s_name_buf : kmalloc(sizeof(s_name_buf), GFP_NOFS); if (name) { struct dentry *de = d_find_alias(inode); if (de) { int len; spin_lock(&de->d_lock); len = snprintf(name, sizeof(s_name_buf), " \"%s\"", de->d_name.name); spin_unlock(&de->d_lock); if (len <= 0) name[0] = 0; else if (len >= sizeof(s_name_buf)) name[sizeof(s_name_buf) - 1] = 0; } else { name[0] = 0; } dput(de); /* Cocci warns if placed in branch "if (de)" */ } va_start(args, fmt); level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; printk("%c%cntfs3(%s): ino=%lx,%s %pV\n", KERN_SOH_ASCII, level, sb->s_id, inode->i_ino, name ? name : "", &vaf); va_end(args); atomic_inc(&s_name_buf_cnt); if (name != s_name_buf) kfree(name); } #endif /* * Shared memory struct. * * On-disk ntfs's upcase table is created by ntfs formatter. * 'upcase' table is 128K bytes of memory. * We should read it into memory when mounting. * Several ntfs volumes likely use the same 'upcase' table. * It is good idea to share in-memory 'upcase' table between different volumes. * Unfortunately winxp/vista/win7 use different upcase tables. */ static DEFINE_SPINLOCK(s_shared_lock); static struct { void *ptr; u32 len; int cnt; } s_shared[8]; /* * ntfs_set_shared * * Return: * * @ptr - If pointer was saved in shared memory. * * NULL - If pointer was not shared. */ void *ntfs_set_shared(void *ptr, u32 bytes) { void *ret = NULL; int i, j = -1; spin_lock(&s_shared_lock); for (i = 0; i < ARRAY_SIZE(s_shared); i++) { if (!s_shared[i].cnt) { j = i; } else if (bytes == s_shared[i].len && !memcmp(s_shared[i].ptr, ptr, bytes)) { s_shared[i].cnt += 1; ret = s_shared[i].ptr; break; } } if (!ret && j != -1) { s_shared[j].ptr = ptr; s_shared[j].len = bytes; s_shared[j].cnt = 1; ret = ptr; } spin_unlock(&s_shared_lock); return ret; } /* * ntfs_put_shared * * Return: * * @ptr - If pointer is not shared anymore. * * NULL - If pointer is still shared. */ void *ntfs_put_shared(void *ptr) { void *ret = ptr; int i; spin_lock(&s_shared_lock); for (i = 0; i < ARRAY_SIZE(s_shared); i++) { if (s_shared[i].cnt && s_shared[i].ptr == ptr) { if (--s_shared[i].cnt) ret = NULL; break; } } spin_unlock(&s_shared_lock); return ret; } static inline void put_mount_options(struct ntfs_mount_options *options) { kfree(options->nls_name); unload_nls(options->nls); kfree(options); } enum Opt { Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_immutable, Opt_discard, Opt_force, Opt_sparse, Opt_nohidden, Opt_hide_dot_files, Opt_windows_names, Opt_showmeta, Opt_acl, Opt_iocharset, Opt_prealloc, Opt_nocase, Opt_err, }; // clang-format off static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_uid("uid", Opt_uid), fsparam_gid("gid", Opt_gid), fsparam_u32oct("umask", Opt_umask), fsparam_u32oct("dmask", Opt_dmask), fsparam_u32oct("fmask", Opt_fmask), fsparam_flag("sys_immutable", Opt_immutable), fsparam_flag("discard", Opt_discard), fsparam_flag("force", Opt_force), fsparam_flag("sparse", Opt_sparse), fsparam_flag("nohidden", Opt_nohidden), fsparam_flag("hide_dot_files", Opt_hide_dot_files), fsparam_flag("windows_names", Opt_windows_names), fsparam_flag("showmeta", Opt_showmeta), fsparam_flag("acl", Opt_acl), fsparam_string("iocharset", Opt_iocharset), fsparam_flag("prealloc", Opt_prealloc), fsparam_flag("nocase", Opt_nocase), {} }; // clang-format on /* * Load nls table or if @nls is utf8 then return NULL. * * It is good idea to use here "const char *nls". * But load_nls accepts "char*". */ static struct nls_table *ntfs_load_nls(char *nls) { struct nls_table *ret; if (!nls) nls = CONFIG_NLS_DEFAULT; if (strcmp(nls, "utf8") == 0) return NULL; if (strcmp(nls, CONFIG_NLS_DEFAULT) == 0) return load_nls_default(); ret = load_nls(nls); if (ret) return ret; return ERR_PTR(-EINVAL); } static int ntfs_fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct ntfs_mount_options *opts = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, ntfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_uid: opts->fs_uid = result.uid; break; case Opt_gid: opts->fs_gid = result.gid; break; case Opt_umask: if (result.uint_32 & ~07777) return invalf(fc, "ntfs3: Invalid value for umask."); opts->fs_fmask_inv = ~result.uint_32; opts->fs_dmask_inv = ~result.uint_32; opts->fmask = 1; opts->dmask = 1; break; case Opt_dmask: if (result.uint_32 & ~07777) return invalf(fc, "ntfs3: Invalid value for dmask."); opts->fs_dmask_inv = ~result.uint_32; opts->dmask = 1; break; case Opt_fmask: if (result.uint_32 & ~07777) return invalf(fc, "ntfs3: Invalid value for fmask."); opts->fs_fmask_inv = ~result.uint_32; opts->fmask = 1; break; case Opt_immutable: opts->sys_immutable = 1; break; case Opt_discard: opts->discard = 1; break; case Opt_force: opts->force = 1; break; case Opt_sparse: opts->sparse = 1; break; case Opt_nohidden: opts->nohidden = 1; break; case Opt_hide_dot_files: opts->hide_dot_files = 1; break; case Opt_windows_names: opts->windows_names = 1; break; case Opt_showmeta: opts->showmeta = 1; break; case Opt_acl: if (!result.negated) #ifdef CONFIG_NTFS3_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else return invalf( fc, "ntfs3: Support for ACL not compiled in!"); #endif else fc->sb_flags &= ~SB_POSIXACL; break; case Opt_iocharset: kfree(opts->nls_name); opts->nls_name = param->string; param->string = NULL; break; case Opt_prealloc: opts->prealloc = 1; break; case Opt_nocase: opts->nocase = 1; break; default: /* Should not be here unless we forget add case. */ return -EINVAL; } return 0; } static int ntfs_fs_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_mount_options *new_opts = fc->fs_private; int ro_rw; /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ if (is_legacy_ntfs(sb)) { fc->sb_flags |= SB_RDONLY; goto out; } ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY); if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) { errorf(fc, "ntfs3: Couldn't remount rw because journal is not replayed. Please umount/remount instead\n"); return -EINVAL; } new_opts->nls = ntfs_load_nls(new_opts->nls_name); if (IS_ERR(new_opts->nls)) { new_opts->nls = NULL; errorf(fc, "ntfs3: Cannot load iocharset %s", new_opts->nls_name); return -EINVAL; } if (new_opts->nls != sbi->options->nls) return invalf( fc, "ntfs3: Cannot use different iocharset when remounting!"); if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) && !new_opts->force) { errorf(fc, "ntfs3: Volume is dirty and \"force\" flag is not set!"); return -EINVAL; } out: sync_filesystem(sb); swap(sbi->options, fc->fs_private); return 0; } #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_info_root; /* * ntfs3_volinfo: * * The content of /proc/fs/ntfs3/<dev>/volinfo * * ntfs3.1 * cluster size * number of clusters * total number of mft records * number of used mft records ~= number of files + folders * real state of ntfs "dirty"/"clean" * current state of ntfs "dirty"/"clean" */ static int ntfs3_volinfo(struct seq_file *m, void *o) { struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; seq_printf(m, "ntfs%d.%d\n%u\n%zu\n%zu\n%zu\n%s\n%s\n", sbi->volume.major_ver, sbi->volume.minor_ver, sbi->cluster_size, sbi->used.bitmap.nbits, sbi->mft.bitmap.nbits, sbi->mft.bitmap.nbits - wnd_zeroes(&sbi->mft.bitmap), sbi->volume.real_dirty ? "dirty" : "clean", (sbi->volume.flags & VOLUME_FLAG_DIRTY) ? "dirty" : "clean"); return 0; } static int ntfs3_volinfo_open(struct inode *inode, struct file *file) { return single_open(file, ntfs3_volinfo, pde_data(inode)); } /* read /proc/fs/ntfs3/<dev>/label */ static int ntfs3_label_show(struct seq_file *m, void *o) { struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; seq_printf(m, "%s\n", sbi->volume.label); return 0; } /* write /proc/fs/ntfs3/<dev>/label */ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { int err; struct super_block *sb = pde_data(file_inode(file)); ssize_t ret = count; u8 *label; if (sb_rdonly(sb)) return -EROFS; label = kmalloc(count, GFP_NOFS); if (!label) return -ENOMEM; if (copy_from_user(label, buffer, ret)) { ret = -EFAULT; goto out; } while (ret > 0 && label[ret - 1] == '\n') ret -= 1; err = ntfs_set_label(sb->s_fs_info, label, ret); if (err < 0) { ntfs_err(sb, "failed (%d) to write label", err); ret = err; goto out; } *ppos += count; ret = count; out: kfree(label); return ret; } static int ntfs3_label_open(struct inode *inode, struct file *file) { return single_open(file, ntfs3_label_show, pde_data(inode)); } static const struct proc_ops ntfs3_volinfo_fops = { .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_open = ntfs3_volinfo_open, }; static const struct proc_ops ntfs3_label_fops = { .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_open = ntfs3_label_open, .proc_write = ntfs3_label_write, }; static void ntfs_create_procdir(struct super_block *sb) { struct proc_dir_entry *e; if (!proc_info_root) return; e = proc_mkdir(sb->s_id, proc_info_root); if (e) { struct ntfs_sb_info *sbi = sb->s_fs_info; proc_create_data("volinfo", 0444, e, &ntfs3_volinfo_fops, sb); proc_create_data("label", 0644, e, &ntfs3_label_fops, sb); sbi->procdir = e; } } static void ntfs_remove_procdir(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; if (!sbi->procdir) return; remove_proc_entry("label", sbi->procdir); remove_proc_entry("volinfo", sbi->procdir); remove_proc_entry(sb->s_id, proc_info_root); sbi->procdir = NULL; } static void ntfs_create_proc_root(void) { proc_info_root = proc_mkdir("fs/ntfs3", NULL); } static void ntfs_remove_proc_root(void) { if (proc_info_root) { remove_proc_entry("fs/ntfs3", NULL); proc_info_root = NULL; } } #else static void ntfs_create_procdir(struct super_block *sb) {} static void ntfs_remove_procdir(struct super_block *sb) {} static void ntfs_create_proc_root(void) {} static void ntfs_remove_proc_root(void) {} #endif static struct kmem_cache *ntfs_inode_cachep; static struct inode *ntfs_alloc_inode(struct super_block *sb) { struct ntfs_inode *ni = alloc_inode_sb(sb, ntfs_inode_cachep, GFP_NOFS); if (!ni) return NULL; memset(ni, 0, offsetof(struct ntfs_inode, vfs_inode)); mutex_init(&ni->ni_lock); return &ni->vfs_inode; } static void ntfs_free_inode(struct inode *inode) { struct ntfs_inode *ni = ntfs_i(inode); mutex_destroy(&ni->ni_lock); kmem_cache_free(ntfs_inode_cachep, ni); } static void init_once(void *foo) { struct ntfs_inode *ni = foo; inode_init_once(&ni->vfs_inode); } /* * Noinline to reduce binary size. */ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) { wnd_close(&sbi->mft.bitmap); wnd_close(&sbi->used.bitmap); if (sbi->mft.ni) { iput(&sbi->mft.ni->vfs_inode); sbi->mft.ni = NULL; } if (sbi->security.ni) { iput(&sbi->security.ni->vfs_inode); sbi->security.ni = NULL; } if (sbi->reparse.ni) { iput(&sbi->reparse.ni->vfs_inode); sbi->reparse.ni = NULL; } if (sbi->objid.ni) { iput(&sbi->objid.ni->vfs_inode); sbi->objid.ni = NULL; } if (sbi->volume.ni) { iput(&sbi->volume.ni->vfs_inode); sbi->volume.ni = NULL; } ntfs_update_mftmirr(sbi, 0); indx_clear(&sbi->security.index_sii); indx_clear(&sbi->security.index_sdh); indx_clear(&sbi->reparse.index_r); indx_clear(&sbi->objid.index_o); } static void ntfs3_free_sbi(struct ntfs_sb_info *sbi) { kfree(sbi->new_rec); kvfree(ntfs_put_shared(sbi->upcase)); kvfree(sbi->def_table); kfree(sbi->compress.lznt); #ifdef CONFIG_NTFS3_LZX_XPRESS xpress_free_decompressor(sbi->compress.xpress); lzx_free_decompressor(sbi->compress.lzx); #endif kfree(sbi); } static void ntfs_put_super(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; ntfs_remove_procdir(sb); /* Mark rw ntfs as clear, if possible. */ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); ntfs3_put_sbi(sbi); } static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct wnd_bitmap *wnd = &sbi->used.bitmap; buf->f_type = sb->s_magic; buf->f_bsize = sbi->cluster_size; buf->f_blocks = wnd->nbits; buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd); buf->f_fsid.val[0] = sbi->volume.ser_num; buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32); buf->f_namelen = NTFS_NAME_LEN; return 0; } static int ntfs_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_mount_options *opts = sbi->options; struct user_namespace *user_ns = seq_user_ns(m); seq_printf(m, ",uid=%u", from_kuid_munged(user_ns, opts->fs_uid)); seq_printf(m, ",gid=%u", from_kgid_munged(user_ns, opts->fs_gid)); if (opts->dmask) seq_printf(m, ",dmask=%04o", opts->fs_dmask_inv ^ 0xffff); if (opts->fmask) seq_printf(m, ",fmask=%04o", opts->fs_fmask_inv ^ 0xffff); if (opts->sys_immutable) seq_puts(m, ",sys_immutable"); if (opts->discard) seq_puts(m, ",discard"); if (opts->force) seq_puts(m, ",force"); if (opts->sparse) seq_puts(m, ",sparse"); if (opts->nohidden) seq_puts(m, ",nohidden"); if (opts->hide_dot_files) seq_puts(m, ",hide_dot_files"); if (opts->windows_names) seq_puts(m, ",windows_names"); if (opts->showmeta) seq_puts(m, ",showmeta"); if (sb->s_flags & SB_POSIXACL) seq_puts(m, ",acl"); if (opts->nls) seq_printf(m, ",iocharset=%s", opts->nls->charset); else seq_puts(m, ",iocharset=utf8"); if (opts->prealloc) seq_puts(m, ",prealloc"); if (opts->nocase) seq_puts(m, ",nocase"); return 0; } /* * ntfs_shutdown - super_operations::shutdown */ static void ntfs_shutdown(struct super_block *sb) { set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } /* * ntfs_sync_fs - super_operations::sync_fs */ static int ntfs_sync_fs(struct super_block *sb, int wait) { int err = 0, err2; struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_inode *ni; struct inode *inode; if (unlikely(ntfs3_forced_shutdown(sb))) return -EIO; ni = sbi->security.ni; if (ni) { inode = &ni->vfs_inode; err2 = _ni_write_inode(inode, wait); if (err2 && !err) err = err2; } ni = sbi->objid.ni; if (ni) { inode = &ni->vfs_inode; err2 = _ni_write_inode(inode, wait); if (err2 && !err) err = err2; } ni = sbi->reparse.ni; if (ni) { inode = &ni->vfs_inode; err2 = _ni_write_inode(inode, wait); if (err2 && !err) err = err2; } if (!err) ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); ntfs_update_mftmirr(sbi, wait); return err; } static const struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_inode, .free_inode = ntfs_free_inode, .evict_inode = ntfs_evict_inode, .put_super = ntfs_put_super, .statfs = ntfs_statfs, .show_options = ntfs_show_options, .shutdown = ntfs_shutdown, .sync_fs = ntfs_sync_fs, .write_inode = ntfs3_write_inode, }; static struct inode *ntfs_export_get_inode(struct super_block *sb, u64 ino, u32 generation) { struct MFT_REF ref; struct inode *inode; ref.low = cpu_to_le32(ino); #ifdef CONFIG_NTFS3_64BIT_CLUSTER ref.high = cpu_to_le16(ino >> 32); #else ref.high = 0; #endif ref.seq = cpu_to_le16(generation); inode = ntfs_iget5(sb, &ref, NULL); if (!IS_ERR(inode) && is_bad_inode(inode)) { iput(inode); inode = ERR_PTR(-ESTALE); } return inode; } static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ntfs_export_get_inode); } static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, ntfs_export_get_inode); } /* TODO: == ntfs_sync_inode */ static int ntfs_nfs_commit_metadata(struct inode *inode) { return _ni_write_inode(inode, 1); } static const struct export_operations ntfs_export_ops = { .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ntfs_fh_to_dentry, .fh_to_parent = ntfs_fh_to_parent, .get_parent = ntfs3_get_parent, .commit_metadata = ntfs_nfs_commit_metadata, }; /* * format_size_gb - Return Gb,Mb to print with "%u.%02u Gb". */ static u32 format_size_gb(const u64 bytes, u32 *mb) { /* Do simple right 30 bit shift of 64 bit value. */ u64 kbytes = bytes >> 10; u32 kbytes32 = kbytes; *mb = (100 * (kbytes32 & 0xfffff) + 0x7ffff) >> 20; if (*mb >= 100) *mb = 99; return (kbytes32 >> 20) | (((u32)(kbytes >> 32)) << 12); } static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) { if (boot->sectors_per_clusters <= 0x80) return boot->sectors_per_clusters; if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */ return 1U << (-(s8)boot->sectors_per_clusters); return -EINVAL; } /* * ntfs_init_from_boot - Init internal info from on-disk boot sector. * * NTFS mount begins from boot - special formatted 512 bytes. * There are two boots: the first and the last 512 bytes of volume. * The content of boot is not changed during ntfs life. * * NOTE: ntfs.sys checks only first (primary) boot. * chkdsk checks both boots. */ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u64 dev_size, struct NTFS_BOOT **boot2) { struct ntfs_sb_info *sbi = sb->s_fs_info; int err; u32 mb, gb, boot_sector_size, sct_per_clst, record_size; u64 sectors, clusters, mlcn, mlcn2, dev_size0; struct NTFS_BOOT *boot; struct buffer_head *bh; struct MFT_REC *rec; u16 fn, ao; u8 cluster_bits; u32 boot_off = 0; sector_t boot_block = 0; const char *hint = "Primary boot"; /* Save original dev_size. Used with alternative boot. */ dev_size0 = dev_size; sbi->volume.blocks = dev_size >> PAGE_SHIFT; read_boot: bh = ntfs_bread(sb, boot_block); if (!bh) return boot_block ? -EINVAL : -EIO; err = -EINVAL; /* Corrupted image; do not read OOB */ if (bh->b_size - sizeof(*boot) < boot_off) goto out; boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { ntfs_err(sb, "%s signature is not NTFS.", hint); goto out; } /* 0x55AA is not mandaroty. Thanks Maxim Suhanov*/ /*if (0x55 != boot->boot_magic[0] || 0xAA != boot->boot_magic[1]) * goto out; */ boot_sector_size = ((u32)boot->bytes_per_sector[1] << 8) | boot->bytes_per_sector[0]; if (boot_sector_size < SECTOR_SIZE || !is_power_of_2(boot_sector_size)) { ntfs_err(sb, "%s: invalid bytes per sector %u.", hint, boot_sector_size); goto out; } /* cluster size: 512, 1K, 2K, 4K, ... 2M */ sct_per_clst = true_sectors_per_clst(boot); if ((int)sct_per_clst < 0 || !is_power_of_2(sct_per_clst)) { ntfs_err(sb, "%s: invalid sectors per cluster %u.", hint, sct_per_clst); goto out; } sbi->cluster_size = boot_sector_size * sct_per_clst; sbi->cluster_bits = cluster_bits = blksize_bits(sbi->cluster_size); sbi->cluster_mask = sbi->cluster_size - 1; sbi->cluster_mask_inv = ~(u64)sbi->cluster_mask; mlcn = le64_to_cpu(boot->mft_clst); mlcn2 = le64_to_cpu(boot->mft2_clst); sectors = le64_to_cpu(boot->sectors_per_volume); if (mlcn * sct_per_clst >= sectors || mlcn2 * sct_per_clst >= sectors) { ntfs_err( sb, "%s: start of MFT 0x%llx (0x%llx) is out of volume 0x%llx.", hint, mlcn, mlcn2, sectors); goto out; } if (boot->record_size >= 0) { record_size = (u32)boot->record_size << cluster_bits; } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { record_size = 1u << (-boot->record_size); } else { ntfs_err(sb, "%s: invalid record size %d.", hint, boot->record_size); goto out; } sbi->record_size = record_size; sbi->record_bits = blksize_bits(record_size); sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes /* Check MFT record size. */ if (record_size < SECTOR_SIZE || !is_power_of_2(record_size)) { ntfs_err(sb, "%s: invalid bytes per MFT record %u (%d).", hint, record_size, boot->record_size); goto out; } if (record_size > MAXIMUM_BYTES_PER_MFT) { ntfs_err(sb, "Unsupported bytes per MFT record %u.", record_size); goto out; } if (boot->index_size >= 0) { sbi->index_size = (u32)boot->index_size << cluster_bits; } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { sbi->index_size = 1u << (-boot->index_size); } else { ntfs_err(sb, "%s: invalid index size %d.", hint, boot->index_size); goto out; } /* Check index record size. */ if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { ntfs_err(sb, "%s: invalid bytes per index %u(%d).", hint, sbi->index_size, boot->index_size); goto out; } if (sbi->index_size > MAXIMUM_BYTES_PER_INDEX) { ntfs_err(sb, "%s: unsupported bytes per index %u.", hint, sbi->index_size); goto out; } sbi->volume.size = sectors * boot_sector_size; gb = format_size_gb(sbi->volume.size + boot_sector_size, &mb); /* * - Volume formatted and mounted with the same sector size. * - Volume formatted 4K and mounted as 512. * - Volume formatted 512 and mounted as 4K. */ if (boot_sector_size != sector_size) { ntfs_warn( sb, "Different NTFS sector size (%u) and media sector size (%u).", boot_sector_size, sector_size); dev_size += sector_size - 1; } sbi->mft.lbo = mlcn << cluster_bits; sbi->mft.lbo2 = mlcn2 << cluster_bits; /* Compare boot's cluster and sector. */ if (sbi->cluster_size < boot_sector_size) { ntfs_err(sb, "%s: invalid bytes per cluster (%u).", hint, sbi->cluster_size); goto out; } /* Compare boot's cluster and media sector. */ if (sbi->cluster_size < sector_size) { /* No way to use ntfs_get_block in this case. */ ntfs_err( sb, "Failed to mount 'cause NTFS's cluster size (%u) is less than media sector size (%u).", sbi->cluster_size, sector_size); goto out; } sbi->max_bytes_per_attr = record_size - ALIGN(MFTRECORD_FIXUP_OFFSET, 8) - ALIGN(((record_size >> SECTOR_SHIFT) * sizeof(short)), 8) - ALIGN(sizeof(enum ATTR_TYPE), 8); sbi->volume.ser_num = le64_to_cpu(boot->serial_num); /* Warning if RAW volume. */ if (dev_size < sbi->volume.size + boot_sector_size) { u32 mb0, gb0; gb0 = format_size_gb(dev_size, &mb0); ntfs_warn( sb, "RAW NTFS volume: Filesystem size %u.%02u Gb > volume size %u.%02u Gb. Mount in read-only.", gb, mb, gb0, mb0); sb->s_flags |= SB_RDONLY; } clusters = sbi->volume.size >> cluster_bits; #ifndef CONFIG_NTFS3_64BIT_CLUSTER /* 32 bits per cluster. */ if (clusters >> 32) { ntfs_notice( sb, "NTFS %u.%02u Gb is too big to use 32 bits per cluster.", gb, mb); goto out; } #elif BITS_PER_LONG < 64 #error "CONFIG_NTFS3_64BIT_CLUSTER incompatible in 32 bit OS" #endif sbi->used.bitmap.nbits = clusters; rec = kzalloc(record_size, GFP_NOFS); if (!rec) { err = -ENOMEM; goto out; } sbi->new_rec = rec; rec->rhdr.sign = NTFS_FILE_SIGNATURE; rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET); fn = (sbi->record_size >> SECTOR_SHIFT) + 1; rec->rhdr.fix_num = cpu_to_le16(fn); ao = ALIGN(MFTRECORD_FIXUP_OFFSET + sizeof(short) * fn, 8); rec->attr_off = cpu_to_le16(ao); rec->used = cpu_to_le32(ao + ALIGN(sizeof(enum ATTR_TYPE), 8)); rec->total = cpu_to_le32(sbi->record_size); ((struct ATTRIB *)Add2Ptr(rec, ao))->type = ATTR_END; sb_set_blocksize(sb, min_t(u32, sbi->cluster_size, PAGE_SIZE)); sbi->block_mask = sb->s_blocksize - 1; sbi->blocks_per_cluster = sbi->cluster_size >> sb->s_blocksize_bits; sbi->volume.blocks = sbi->volume.size >> sb->s_blocksize_bits; /* Maximum size for normal files. */ sbi->maxbytes = (clusters << cluster_bits) - 1; #ifdef CONFIG_NTFS3_64BIT_CLUSTER if (clusters >= (1ull << (64 - cluster_bits))) sbi->maxbytes = -1; sbi->maxbytes_sparse = -1; sb->s_maxbytes = MAX_LFS_FILESIZE; #else /* Maximum size for sparse file. */ sbi->maxbytes_sparse = (1ull << (cluster_bits + 32)) - 1; sb->s_maxbytes = 0xFFFFFFFFull << cluster_bits; #endif /* * Compute the MFT zone at two steps. * It would be nice if we are able to allocate 1/8 of * total clusters for MFT but not more then 512 MB. */ sbi->zone_max = min_t(CLST, 0x20000000 >> cluster_bits, clusters >> 3); err = 0; if (bh->b_blocknr && !sb_rdonly(sb)) { /* * Alternative boot is ok but primary is not ok. * Do not update primary boot here 'cause it may be faked boot. * Let ntfs to be mounted and update boot later. */ *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN); } out: brelse(bh); if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); u64 lbo = dev_size0 - sizeof(*boot); boot_block = lbo >> blksize_bits(block_size); boot_off = lbo & (block_size - 1); if (boot_block && block_size >= boot_off + sizeof(*boot)) { /* * Try alternative boot (last sector) */ sb_set_blocksize(sb, block_size); hint = "Alternative boot"; dev_size = dev_size0; /* restore original size. */ goto read_boot; } } return err; } /* * ntfs_fill_super - Try to mount. */ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) { int err; struct ntfs_sb_info *sbi = sb->s_fs_info; struct block_device *bdev = sb->s_bdev; struct ntfs_mount_options *options; struct inode *inode; struct ntfs_inode *ni; size_t i, tt, bad_len, bad_frags; CLST vcn, lcn, len; struct ATTRIB *attr; const struct VOLUME_INFO *info; u32 done, bytes; struct ATTR_DEF_ENTRY *t; u16 *shared; struct MFT_REF ref; bool ro = sb_rdonly(sb); struct NTFS_BOOT *boot2 = NULL; ref.high = 0; sbi->sb = sb; sbi->options = options = fc->fs_private; fc->fs_private = NULL; sb->s_flags |= SB_NODIRATIME; sb->s_magic = 0x7366746e; // "ntfs" sb->s_op = &ntfs_sops; sb->s_export_op = &ntfs_export_ops; sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec sb->s_xattr = ntfs_xattr_handlers; if (options->nocase) set_default_d_op(sb, &ntfs_dentry_ops); options->nls = ntfs_load_nls(options->nls_name); if (IS_ERR(options->nls)) { options->nls = NULL; errorf(fc, "Cannot load nls %s", options->nls_name); err = -EINVAL; goto out; } if (bdev_max_discard_sectors(bdev) && bdev_discard_granularity(bdev)) { sbi->discard_granularity = bdev_discard_granularity(bdev); sbi->discard_granularity_mask_inv = ~(u64)(sbi->discard_granularity - 1); } /* Parse boot. */ err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev), bdev_nr_bytes(bdev), &boot2); if (err) goto out; /* * Load $Volume. This should be done before $LogFile * 'cause 'sbi->volume.ni' is used in 'ntfs_set_state'. */ ref.low = cpu_to_le32(MFT_REC_VOL); ref.seq = cpu_to_le16(MFT_REC_VOL); inode = ntfs_iget5(sb, &ref, &NAME_VOLUME); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Volume (%d).", err); goto out; } ni = ntfs_i(inode); /* Load and save label (not necessary). */ attr = ni_find_attr(ni, NULL, NULL, ATTR_LABEL, NULL, 0, NULL, NULL); if (!attr) { /* It is ok if no ATTR_LABEL */ } else if (!attr->non_res && !is_attr_ext(attr)) { /* $AttrDef allows labels to be up to 128 symbols. */ err = utf16s_to_utf8s(resident_data(attr), le32_to_cpu(attr->res.data_size) >> 1, UTF16_LITTLE_ENDIAN, sbi->volume.label, sizeof(sbi->volume.label)); if (err < 0) sbi->volume.label[0] = 0; } else { /* Should we break mounting here? */ //err = -EINVAL; //goto put_inode_out; } attr = ni_find_attr(ni, attr, NULL, ATTR_VOL_INFO, NULL, 0, NULL, NULL); if (!attr || is_attr_ext(attr) || !(info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO))) { ntfs_err(sb, "$Volume is corrupted."); err = -EINVAL; goto put_inode_out; } sbi->volume.major_ver = info->major_ver; sbi->volume.minor_ver = info->minor_ver; sbi->volume.flags = info->flags; sbi->volume.ni = ni; if (info->flags & VOLUME_FLAG_DIRTY) { sbi->volume.real_dirty = true; ntfs_info(sb, "It is recommened to use chkdsk."); } /* Load $MFTMirr to estimate recs_mirr. */ ref.low = cpu_to_le32(MFT_REC_MIRR); ref.seq = cpu_to_le16(MFT_REC_MIRR); inode = ntfs_iget5(sb, &ref, &NAME_MIRROR); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $MFTMirr (%d).", err); goto out; } sbi->mft.recs_mirr = ntfs_up_cluster(sbi, inode->i_size) >> sbi->record_bits; iput(inode); /* Load LogFile to replay. */ ref.low = cpu_to_le32(MFT_REC_LOG); ref.seq = cpu_to_le16(MFT_REC_LOG); inode = ntfs_iget5(sb, &ref, &NAME_LOGFILE); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load \x24LogFile (%d).", err); goto out; } ni = ntfs_i(inode); err = ntfs_loadlog_and_replay(ni, sbi); if (err) goto put_inode_out; iput(inode); if ((sbi->flags & NTFS_FLAGS_NEED_REPLAY) && !ro) { ntfs_warn(sb, "failed to replay log file. Can't mount rw!"); err = -EINVAL; goto out; } if ((sbi->volume.flags & VOLUME_FLAG_DIRTY) && !ro && !options->force) { ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!"); err = -EINVAL; goto out; } /* Load $MFT. */ ref.low = cpu_to_le32(MFT_REC_MFT); ref.seq = cpu_to_le16(1); inode = ntfs_iget5(sb, &ref, &NAME_MFT); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $MFT (%d).", err); goto out; } ni = ntfs_i(inode); sbi->mft.used = ni->i_valid >> sbi->record_bits; tt = inode->i_size >> sbi->record_bits; sbi->mft.next_free = MFT_REC_USER; err = wnd_init(&sbi->mft.bitmap, sb, tt); if (err) goto put_inode_out; err = ni_load_all_mi(ni); if (err) { ntfs_err(sb, "Failed to load $MFT's subrecords (%d).", err); goto put_inode_out; } sbi->mft.ni = ni; /* Load $Bitmap. */ ref.low = cpu_to_le32(MFT_REC_BITMAP); ref.seq = cpu_to_le16(MFT_REC_BITMAP); inode = ntfs_iget5(sb, &ref, &NAME_BITMAP); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Bitmap (%d).", err); goto out; } #ifndef CONFIG_NTFS3_64BIT_CLUSTER if (inode->i_size >> 32) { err = -EINVAL; goto put_inode_out; } #endif /* Check bitmap boundary. */ tt = sbi->used.bitmap.nbits; if (inode->i_size < ntfs3_bitmap_size(tt)) { ntfs_err(sb, "$Bitmap is corrupted."); err = -EINVAL; goto put_inode_out; } err = wnd_init(&sbi->used.bitmap, sb, tt); if (err) { ntfs_err(sb, "Failed to initialize $Bitmap (%d).", err); goto put_inode_out; } iput(inode); /* Compute the MFT zone. */ err = ntfs_refresh_zone(sbi); if (err) { ntfs_err(sb, "Failed to initialize MFT zone (%d).", err); goto out; } /* Load $BadClus. */ ref.low = cpu_to_le32(MFT_REC_BADCLUST); ref.seq = cpu_to_le16(MFT_REC_BADCLUST); inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $BadClus (%d).", err); goto out; } ni = ntfs_i(inode); bad_len = bad_frags = 0; for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) { if (lcn == SPARSE_LCN) continue; bad_len += len; bad_frags += 1; if (ro) continue; if (wnd_set_used_safe(&sbi->used.bitmap, lcn, len, &tt) || tt) { /* Bad blocks marked as free in bitmap. */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } } if (bad_len) { /* * Notice about bad blocks. * In normal cases these blocks are marked as used in bitmap. * And we never allocate space in it. */ ntfs_notice(sb, "Volume contains %zu bad blocks in %zu fragments.", bad_len, bad_frags); } iput(inode); /* Load $AttrDef. */ ref.low = cpu_to_le32(MFT_REC_ATTR); ref.seq = cpu_to_le16(MFT_REC_ATTR); inode = ntfs_iget5(sb, &ref, &NAME_ATTRDEF); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $AttrDef (%d)", err); goto out; } /* * Typical $AttrDef contains up to 20 entries. * Check for extremely large/small size. */ if (inode->i_size < sizeof(struct ATTR_DEF_ENTRY) || inode->i_size > 100 * sizeof(struct ATTR_DEF_ENTRY)) { ntfs_err(sb, "Looks like $AttrDef is corrupted (size=%llu).", inode->i_size); err = -EINVAL; goto put_inode_out; } bytes = inode->i_size; sbi->def_table = t = kvmalloc(bytes, GFP_KERNEL); if (!t) { err = -ENOMEM; goto put_inode_out; } /* Read the entire file. */ err = inode_read_data(inode, sbi->def_table, bytes); if (err) { ntfs_err(sb, "Failed to read $AttrDef (%d).", err); goto put_inode_out; } if (ATTR_STD != t->type) { ntfs_err(sb, "$AttrDef is corrupted."); err = -EINVAL; goto put_inode_out; } t += 1; sbi->def_entries = 1; done = sizeof(struct ATTR_DEF_ENTRY); while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) { u32 t32 = le32_to_cpu(t->type); u64 sz = le64_to_cpu(t->max_sz); if ((t32 & 0xF) || le32_to_cpu(t[-1].type) >= t32) break; if (t->type == ATTR_REPARSE) sbi->reparse.max_size = sz; else if (t->type == ATTR_EA) sbi->ea_max_size = sz; done += sizeof(struct ATTR_DEF_ENTRY); t += 1; sbi->def_entries += 1; } iput(inode); /* Load $UpCase. */ ref.low = cpu_to_le32(MFT_REC_UPCASE); ref.seq = cpu_to_le16(MFT_REC_UPCASE); inode = ntfs_iget5(sb, &ref, &NAME_UPCASE); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $UpCase (%d).", err); goto out; } if (inode->i_size != 0x10000 * sizeof(short)) { err = -EINVAL; ntfs_err(sb, "$UpCase is corrupted."); goto put_inode_out; } /* Read the entire file. */ err = inode_read_data(inode, sbi->upcase, 0x10000 * sizeof(short)); if (err) { ntfs_err(sb, "Failed to read $UpCase (%d).", err); goto put_inode_out; } #ifdef __BIG_ENDIAN { u16 *dst = sbi->upcase; for (i = 0; i < 0x10000; i++) __swab16s(dst++); } #endif shared = ntfs_set_shared(sbi->upcase, 0x10000 * sizeof(short)); if (shared && sbi->upcase != shared) { kvfree(sbi->upcase); sbi->upcase = shared; } iput(inode); if (is_ntfs3(sbi)) { /* Load $Secure. */ err = ntfs_security_init(sbi); if (err) { ntfs_err(sb, "Failed to initialize $Secure (%d).", err); goto out; } /* Load $Extend. */ err = ntfs_extend_init(sbi); if (err) { ntfs_warn(sb, "Failed to initialize $Extend."); goto load_root; } /* Load $Extend/$Reparse. */ err = ntfs_reparse_init(sbi); if (err) { ntfs_warn(sb, "Failed to initialize $Extend/$Reparse."); goto load_root; } /* Load $Extend/$ObjId. */ err = ntfs_objid_init(sbi); if (err) { ntfs_warn(sb, "Failed to initialize $Extend/$ObjId."); goto load_root; } } load_root: /* Load root. */ ref.low = cpu_to_le32(MFT_REC_ROOT); ref.seq = cpu_to_le16(MFT_REC_ROOT); inode = ntfs_iget5(sb, &ref, &NAME_ROOT); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load root (%d).", err); goto out; } /* * Final check. Looks like this case should never occurs. */ if (!inode->i_op) { err = -EINVAL; ntfs_err(sb, "Failed to load root (%d).", err); goto put_inode_out; } sb->s_root = d_make_root(inode); if (!sb->s_root) { err = -ENOMEM; goto put_inode_out; } if (boot2) { /* * Alternative boot is ok but primary is not ok. * Volume is recognized as NTFS. Update primary boot. */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { if (buffer_locked(bh0)) __wait_on_buffer(bh0); lock_buffer(bh0); memcpy(bh0->b_data, boot2, sizeof(*boot2)); set_buffer_uptodate(bh0); mark_buffer_dirty(bh0); unlock_buffer(bh0); if (!sync_dirty_buffer(bh0)) ntfs_warn(sb, "primary boot is updated"); put_bh(bh0); } kfree(boot2); } ntfs_create_procdir(sb); if (is_legacy_ntfs(sb)) sb->s_flags |= SB_RDONLY; return 0; put_inode_out: iput(inode); out: ntfs3_put_sbi(sbi); kfree(boot2); ntfs3_put_sbi(sbi); return err; } void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len) { struct ntfs_sb_info *sbi = sb->s_fs_info; struct block_device *bdev = sb->s_bdev; sector_t devblock = (u64)lcn * sbi->blocks_per_cluster; unsigned long blocks = (u64)len * sbi->blocks_per_cluster; unsigned long cnt = 0; unsigned long limit = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - sb->s_blocksize_bits); if (limit >= 0x2000) limit -= 0x1000; else if (limit < 32) limit = 32; else limit >>= 1; while (blocks--) { clean_bdev_aliases(bdev, devblock++, 1); if (cnt++ >= limit) { sync_blockdev(bdev); cnt = 0; } } } /* * ntfs_discard - Issue a discard request (trim for SSD). */ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) { int err; u64 lbo, bytes, start, end; struct super_block *sb; if (sbi->used.next_free_lcn == lcn + len) sbi->used.next_free_lcn = lcn; if (sbi->flags & NTFS_FLAGS_NODISCARD) return -EOPNOTSUPP; if (!sbi->options->discard) return -EOPNOTSUPP; lbo = (u64)lcn << sbi->cluster_bits; bytes = (u64)len << sbi->cluster_bits; /* Align up 'start' on discard_granularity. */ start = (lbo + sbi->discard_granularity - 1) & sbi->discard_granularity_mask_inv; /* Align down 'end' on discard_granularity. */ end = (lbo + bytes) & sbi->discard_granularity_mask_inv; sb = sbi->sb; if (start >= end) return 0; err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9, GFP_NOFS); if (err == -EOPNOTSUPP) sbi->flags |= NTFS_FLAGS_NODISCARD; return err; } static int ntfs_fs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, ntfs_fill_super); } /* * ntfs_fs_free - Free fs_context. * * Note that this will be called after fill_super and reconfigure * even when they pass. So they have to take pointers if they pass. */ static void ntfs_fs_free(struct fs_context *fc) { struct ntfs_mount_options *opts = fc->fs_private; struct ntfs_sb_info *sbi = fc->s_fs_info; if (sbi) { ntfs3_put_sbi(sbi); ntfs3_free_sbi(sbi); } if (opts) put_mount_options(opts); } // clang-format off static const struct fs_context_operations ntfs_context_ops = { .parse_param = ntfs_fs_parse_param, .get_tree = ntfs_fs_get_tree, .reconfigure = ntfs_fs_reconfigure, .free = ntfs_fs_free, }; // clang-format on /* * ntfs_init_fs_context - Initialize sbi and opts * * This will called when mount/remount. We will first initialize * options so that if remount we can use just that. */ static int __ntfs_init_fs_context(struct fs_context *fc) { struct ntfs_mount_options *opts; struct ntfs_sb_info *sbi; opts = kzalloc(sizeof(struct ntfs_mount_options), GFP_NOFS); if (!opts) return -ENOMEM; /* Default options. */ opts->fs_uid = current_uid(); opts->fs_gid = current_gid(); opts->fs_fmask_inv = ~current_umask(); opts->fs_dmask_inv = ~current_umask(); if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) goto ok; sbi = kzalloc(sizeof(struct ntfs_sb_info), GFP_NOFS); if (!sbi) goto free_opts; sbi->upcase = kvmalloc(0x10000 * sizeof(short), GFP_KERNEL); if (!sbi->upcase) goto free_sbi; ratelimit_state_init(&sbi->msg_ratelimit, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); mutex_init(&sbi->compress.mtx_lznt); #ifdef CONFIG_NTFS3_LZX_XPRESS mutex_init(&sbi->compress.mtx_xpress); mutex_init(&sbi->compress.mtx_lzx); #endif fc->s_fs_info = sbi; ok: fc->fs_private = opts; fc->ops = &ntfs_context_ops; return 0; free_sbi: kfree(sbi); free_opts: kfree(opts); return -ENOMEM; } static int ntfs_init_fs_context(struct fs_context *fc) { return __ntfs_init_fs_context(fc); } static void ntfs3_kill_sb(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; kill_block_super(sb); if (sbi->options) put_mount_options(sbi->options); ntfs3_free_sbi(sbi); } // clang-format off static struct file_system_type ntfs_fs_type = { .owner = THIS_MODULE, .name = "ntfs3", .init_fs_context = ntfs_init_fs_context, .parameters = ntfs_fs_parameters, .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; #if IS_ENABLED(CONFIG_NTFS_FS) static int ntfs_legacy_init_fs_context(struct fs_context *fc) { int ret; ret = __ntfs_init_fs_context(fc); /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ fc->sb_flags |= SB_RDONLY; return ret; } static struct file_system_type ntfs_legacy_fs_type = { .owner = THIS_MODULE, .name = "ntfs", .init_fs_context = ntfs_legacy_init_fs_context, .parameters = ntfs_fs_parameters, .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ntfs"); static inline void register_as_ntfs_legacy(void) { int err = register_filesystem(&ntfs_legacy_fs_type); if (err) pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err); } static inline void unregister_as_ntfs_legacy(void) { unregister_filesystem(&ntfs_legacy_fs_type); } bool is_legacy_ntfs(struct super_block *sb) { return sb->s_type == &ntfs_legacy_fs_type; } #else static inline void register_as_ntfs_legacy(void) {} static inline void unregister_as_ntfs_legacy(void) {} #endif // clang-format on static int __init init_ntfs_fs(void) { int err; if (IS_ENABLED(CONFIG_NTFS3_FS_POSIX_ACL)) pr_info("ntfs3: Enabled Linux POSIX ACLs support\n"); if (IS_ENABLED(CONFIG_NTFS3_64BIT_CLUSTER)) pr_notice( "ntfs3: Warning: Activated 64 bits per cluster. Windows does not support this\n"); if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); ntfs_create_proc_root(); err = ntfs3_init_bitmap(); if (err) goto out2; ntfs_inode_cachep = kmem_cache_create( "ntfs_inode_cache", sizeof(struct ntfs_inode), 0, (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once); if (!ntfs_inode_cachep) { err = -ENOMEM; goto out1; } register_as_ntfs_legacy(); err = register_filesystem(&ntfs_fs_type); if (err) goto out; return 0; out: kmem_cache_destroy(ntfs_inode_cachep); out1: ntfs3_exit_bitmap(); out2: ntfs_remove_proc_root(); return err; } static void __exit exit_ntfs_fs(void) { rcu_barrier(); kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); unregister_as_ntfs_legacy(); ntfs3_exit_bitmap(); ntfs_remove_proc_root(); } MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ntfs3 read/write filesystem"); #ifdef CONFIG_NTFS3_FS_POSIX_ACL MODULE_INFO(behaviour, "Enabled Linux POSIX ACLs support"); #endif #ifdef CONFIG_NTFS3_64BIT_CLUSTER MODULE_INFO( cluster, "Warning: Activated 64 bits per cluster. Windows does not support this"); #endif #ifdef CONFIG_NTFS3_LZX_XPRESS MODULE_INFO(compression, "Read-only lzx/xpress compression included"); #endif MODULE_AUTHOR("Konstantin Komarov"); MODULE_ALIAS_FS("ntfs3"); module_init(init_ntfs_fs); module_exit(exit_ntfs_fs); |
| 37 36 35 2 34 13 13 12 12 33 33 17 18 17 3 3 2 3 1 3 1 15 14 10 7 4 9 2 8 7 8 7 8 10 10 10 1 10 10 1 10 1 10 1 10 8 10 2 10 8 3 10 3 3 37 8 23 34 34 33 26 26 26 37 1 37 7 7 7 7 7 7 7 1 1 7 1 7 1 7 1 7 7 7 1 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_flow.c Generic flow classifier * * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/list.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/pkt_cls.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <linux/slab.h> #include <linux/module.h> #include <net/inet_sock.h> #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> #include <net/flow_dissector.h> #include <net/tc_wrapper.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif struct flow_head { struct list_head filters; struct rcu_head rcu; }; struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct timer_list perturb_timer; u32 perturb_period; u32 handle; u32 nkeys; u32 keymask; u32 mode; u32 mask; u32 xor; u32 rshift; u32 addend; u32 divisor; u32 baseclass; u32 hashrnd; struct rcu_work rwork; }; static inline u32 addr_fold(void *addr) { unsigned long a = (unsigned long)addr; return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); } static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 src = flow_get_u32_src(flow); if (src) return ntohl(src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 dst = flow_get_u32_dst(flow); if (dst) return ntohl(dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) { return skb->skb_iif; } static u32 flow_get_priority(const struct sk_buff *skb) { return skb->priority; } static u32 flow_get_mark(const struct sk_buff *skb) { return skb->mark; } static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb_nfct(skb)); #else return 0; #endif } #if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ if (ct == NULL) \ goto fallback; \ ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ }) #else #define CTTUPLE(skb, member) \ ({ \ goto fallback; \ 0; \ }) #endif static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: return flow_get_src(skb, flow); } static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: return flow_get_dst(skb, flow); } static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: return flow_get_proto_dst(skb, flow); } static u32 flow_get_rtclassid(const struct sk_buff *skb) { #ifdef CONFIG_IP_ROUTE_CLASSID if (skb_dst(skb)) return skb_dst(skb)->tclassid; #endif return 0; } static u32 flow_get_skuid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kuid_t skuid = sk->sk_socket->file->f_cred->fsuid; return from_kuid(&init_user_ns, skuid); } return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kgid_t skgid = sk->sk_socket->file->f_cred->fsgid; return from_kgid(&init_user_ns, skgid); } return 0; } static u32 flow_get_vlan_tag(const struct sk_buff *skb) { u16 tag; if (vlan_get_tag(skb, &tag) < 0) return 0; return tag & VLAN_VID_MASK; } static u32 flow_get_rxhash(struct sk_buff *skb) { return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) { switch (key) { case FLOW_KEY_SRC: return flow_get_src(skb, flow); case FLOW_KEY_DST: return flow_get_dst(skb, flow); case FLOW_KEY_PROTO: return flow_get_proto(skb, flow); case FLOW_KEY_PROTO_SRC: return flow_get_proto_src(skb, flow); case FLOW_KEY_PROTO_DST: return flow_get_proto_dst(skb, flow); case FLOW_KEY_IIF: return flow_get_iif(skb); case FLOW_KEY_PRIORITY: return flow_get_priority(skb); case FLOW_KEY_MARK: return flow_get_mark(skb); case FLOW_KEY_NFCT: return flow_get_nfct(skb); case FLOW_KEY_NFCT_SRC: return flow_get_nfct_src(skb, flow); case FLOW_KEY_NFCT_DST: return flow_get_nfct_dst(skb, flow); case FLOW_KEY_NFCT_PROTO_SRC: return flow_get_nfct_proto_src(skb, flow); case FLOW_KEY_NFCT_PROTO_DST: return flow_get_nfct_proto_dst(skb, flow); case FLOW_KEY_RTCLASSID: return flow_get_rtclassid(skb); case FLOW_KEY_SKUID: return flow_get_skuid(skb); case FLOW_KEY_SKGID: return flow_get_skgid(skb); case FLOW_KEY_VLAN_TAG: return flow_get_vlan_tag(skb); case FLOW_KEY_RXHASH: return flow_get_rxhash(skb); default: WARN_ON(1); return 0; } } #define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ (1 << FLOW_KEY_DST) | \ (1 << FLOW_KEY_PROTO) | \ (1 << FLOW_KEY_PROTO_SRC) | \ (1 << FLOW_KEY_PROTO_DST) | \ (1 << FLOW_KEY_NFCT_SRC) | \ (1 << FLOW_KEY_NFCT_DST) | \ (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ (1 << FLOW_KEY_NFCT_PROTO_DST)) TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct flow_head *head = rcu_dereference_bh(tp->root); struct flow_filter *f; u32 keymask; u32 classid; unsigned int n, key; int r; list_for_each_entry_rcu(f, &head->filters, list) { u32 keys[FLOW_KEY_MAX + 1]; struct flow_keys flow_keys; if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) skb_flow_dissect_flow_keys(skb, &flow_keys, 0); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; keymask &= ~(1 << key); keys[n] = flow_key_get(skb, key, &flow_keys); } if (f->mode == FLOW_MODE_HASH) classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; classid = (classid >> f->rshift) + f->addend; } if (f->divisor) classid %= f->divisor; res->class = 0; res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } return -1; } static void flow_perturbation(struct timer_list *t) { struct flow_filter *f = timer_container_of(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) mod_timer(&f->perturb_timer, jiffies + f->perturb_period); } static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, [TCA_FLOW_RSHIFT] = NLA_POLICY_MAX(NLA_U32, 31 /* BITS_PER_U32 - 1 */), [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static void __flow_destroy_filter(struct flow_filter *f) { timer_shutdown_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); kfree(f); } static void flow_destroy_filter_work(struct work_struct *work) { struct flow_filter *f = container_of(to_rcu_work(work), struct flow_filter, rwork); rtnl_lock(); __flow_destroy_filter(f); rtnl_unlock(); } static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; int err; if (opt == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); if (err < 0) return err; if (tb[TCA_FLOW_BASECLASS]) { baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); if (TC_H_MIN(baseclass) == 0) return -EINVAL; } if (tb[TCA_FLOW_KEYS]) { keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); nkeys = hweight32(keymask); if (nkeys == 0) return -EINVAL; if (fls(keymask) - 1 > FLOW_KEY_MAX) return -EOPNOTSUPP; if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) return -EOPNOTSUPP; } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); if (!fnew) return -ENOBUFS; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE); if (err < 0) goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, extack); if (err < 0) goto err2; fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; fnew->handle = fold->handle; fnew->nkeys = fold->nkeys; fnew->keymask = fold->keymask; fnew->mode = fold->mode; fnew->mask = fold->mask; fnew->xor = fold->xor; fnew->rshift = fold->rshift; fnew->addend = fold->addend; fnew->divisor = fold->divisor; fnew->baseclass = fold->baseclass; fnew->hashrnd = fold->hashrnd; mode = fold->mode; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) goto err2; if (!tb[TCA_FLOW_KEYS]) goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } if (TC_H_MAJ(baseclass) == 0) { struct Qdisc *q = tcf_block_q(tp->chain->block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); fnew->handle = handle; fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); } timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); tcf_block_netif_keep_dst(tp->chain->block); if (tb[TCA_FLOW_KEYS]) { fnew->keymask = keymask; fnew->nkeys = nkeys; } fnew->mode = mode; if (tb[TCA_FLOW_MASK]) fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]); if (tb[TCA_FLOW_XOR]) fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]); if (tb[TCA_FLOW_RSHIFT]) fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); if (tb[TCA_FLOW_ADDEND]) fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); if (tb[TCA_FLOW_DIVISOR]) fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); if (baseclass) fnew->baseclass = baseclass; fnew->perturb_period = perturb_period; if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); *arg = fnew; if (fold) { tcf_exts_get_net(&fold->exts); tcf_queue_work(&fold->rwork, flow_destroy_filter_work); } return 0; err2: tcf_exts_destroy(&fnew->exts); tcf_em_tree_destroy(&fnew->ematches); err1: kfree(fnew); return err; } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; list_del_rcu(&f->list); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, flow_destroy_filter_work); *last = list_empty(&head->filters); return 0; } static int flow_init(struct tcf_proto *tp) { struct flow_head *head; head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->filters); rcu_assign_pointer(tp->root, head); return 0; } static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, flow_destroy_filter_work); else __flow_destroy_filter(f); } kfree_rcu(head, rcu); } static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) return f; return NULL; } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) return skb->len; t->tcm_handle = f->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) || nla_put_u32(skb, TCA_FLOW_MODE, f->mode)) goto nla_put_failure; if (f->mask != ~0 || f->xor != 0) { if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) || nla_put_u32(skb, TCA_FLOW_XOR, f->xor)) goto nla_put_failure; } if (f->rshift && nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift)) goto nla_put_failure; if (f->addend && nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend)) goto nla_put_failure; if (f->divisor && nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor)) goto nla_put_failure; if (f->baseclass && nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass)) goto nla_put_failure; if (f->perturb_period && nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH if (f->ematches.hdr.nmatches && tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) goto nla_put_failure; #endif nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) { if (!tc_cls_stats_dump(tp, arg, f)) break; } } static struct tcf_proto_ops cls_flow_ops __read_mostly = { .kind = "flow", .classify = flow_classify, .init = flow_init, .destroy = flow_destroy, .change = flow_change, .delete = flow_delete, .get = flow_get, .dump = flow_dump, .walk = flow_walk, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("flow"); static int __init cls_flow_init(void) { return register_tcf_proto_ops(&cls_flow_ops); } static void __exit cls_flow_exit(void) { unregister_tcf_proto_ops(&cls_flow_ops); } module_init(cls_flow_init); module_exit(cls_flow_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("TC flow classifier"); |
| 25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __PSP_PSP_H #define __PSP_PSP_H #include <linux/list.h> #include <linux/lockdep.h> #include <linux/mutex.h> #include <net/netns/generic.h> #include <net/psp.h> #include <net/sock.h> extern struct xarray psp_devs; extern struct mutex psp_devs_lock; void psp_dev_free(struct psp_dev *psd); int psp_dev_check_access(struct psp_dev *psd, struct net *net); void psp_nl_notify_dev(struct psp_dev *psd, u32 cmd); struct psp_assoc *psp_assoc_create(struct psp_dev *psd); struct psp_dev *psp_dev_get_for_sock(struct sock *sk); void psp_dev_tx_key_del(struct psp_dev *psd, struct psp_assoc *pas); int psp_sock_assoc_set_rx(struct sock *sk, struct psp_assoc *pas, struct psp_key_parsed *key, struct netlink_ext_ack *extack); int psp_sock_assoc_set_tx(struct sock *sk, struct psp_dev *psd, u32 version, struct psp_key_parsed *key, struct netlink_ext_ack *extack); void psp_assocs_key_rotated(struct psp_dev *psd); static inline void psp_dev_get(struct psp_dev *psd) { refcount_inc(&psd->refcnt); } static inline bool psp_dev_tryget(struct psp_dev *psd) { return refcount_inc_not_zero(&psd->refcnt); } static inline void psp_dev_put(struct psp_dev *psd) { if (refcount_dec_and_test(&psd->refcnt)) psp_dev_free(psd); } static inline bool psp_dev_is_registered(struct psp_dev *psd) { lockdep_assert_held(&psd->lock); return !!psd->ops; } #endif /* __PSP_PSP_H */ |
| 314 310 246 70 312 314 18010 17980 285 153 153 153 2 94 91 93 49 50 49 18 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | // SPDX-License-Identifier: GPL-2.0 /* * SafeSetID Linux Security Module * * Author: Micah Morton <mortonm@chromium.org> * * Copyright (C) 2018 The Chromium OS Authors. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, as * published by the Free Software Foundation. * */ #define pr_fmt(fmt) "SafeSetID: " fmt #include <linux/lsm_hooks.h> #include <linux/module.h> #include <linux/ptrace.h> #include <linux/sched/task_stack.h> #include <linux/security.h> #include <uapi/linux/lsm.h> #include "lsm.h" /* Flag indicating whether initialization completed */ int safesetid_initialized __initdata; struct setid_ruleset __rcu *safesetid_setuid_rules; struct setid_ruleset __rcu *safesetid_setgid_rules; /* Compute a decision for a transition from @src to @dst under @policy. */ enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy, kid_t src, kid_t dst) { struct setid_rule *rule; enum sid_policy_type result = SIDPOL_DEFAULT; if (policy->type == UID) { hash_for_each_possible(policy->rules, rule, next, __kuid_val(src.uid)) { if (!uid_eq(rule->src_id.uid, src.uid)) continue; if (uid_eq(rule->dst_id.uid, dst.uid)) return SIDPOL_ALLOWED; result = SIDPOL_CONSTRAINED; } } else if (policy->type == GID) { hash_for_each_possible(policy->rules, rule, next, __kgid_val(src.gid)) { if (!gid_eq(rule->src_id.gid, src.gid)) continue; if (gid_eq(rule->dst_id.gid, dst.gid)){ return SIDPOL_ALLOWED; } result = SIDPOL_CONSTRAINED; } } else { /* Should not reach here, report the ID as contrainsted */ result = SIDPOL_CONSTRAINED; } return result; } /* * Compute a decision for a transition from @src to @dst under the active * policy. */ static enum sid_policy_type setid_policy_lookup(kid_t src, kid_t dst, enum setid_type new_type) { enum sid_policy_type result = SIDPOL_DEFAULT; struct setid_ruleset *pol; rcu_read_lock(); if (new_type == UID) pol = rcu_dereference(safesetid_setuid_rules); else if (new_type == GID) pol = rcu_dereference(safesetid_setgid_rules); else { /* Should not reach here */ result = SIDPOL_CONSTRAINED; rcu_read_unlock(); return result; } if (pol) { pol->type = new_type; result = _setid_policy_lookup(pol, src, dst); } rcu_read_unlock(); return result; } static int safesetid_security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) { /* We're only interested in CAP_SETUID and CAP_SETGID. */ if (cap != CAP_SETUID && cap != CAP_SETGID) return 0; /* * If CAP_SET{U/G}ID is currently used for a setid or setgroups syscall, we * want to let it go through here; the real security check happens later, in * the task_fix_set{u/g}id or task_fix_setgroups hooks. */ if ((opts & CAP_OPT_INSETID) != 0) return 0; switch (cap) { case CAP_SETUID: /* * If no policy applies to this task, allow the use of CAP_SETUID for * other purposes. */ if (setid_policy_lookup((kid_t){.uid = cred->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT) return 0; /* * Reject use of CAP_SETUID for functionality other than calling * set*uid() (e.g. setting up userns uid mappings). */ pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n", __kuid_val(cred->uid)); return -EPERM; case CAP_SETGID: /* * If no policy applies to this task, allow the use of CAP_SETGID for * other purposes. */ if (setid_policy_lookup((kid_t){.gid = cred->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT) return 0; /* * Reject use of CAP_SETUID for functionality other than calling * set*gid() (e.g. setting up userns gid mappings). */ pr_warn("Operation requires CAP_SETGID, which is not available to GID %u for operations besides approved set*gid transitions\n", __kgid_val(cred->gid)); return -EPERM; default: /* Error, the only capabilities were checking for is CAP_SETUID/GID */ return 0; } return 0; } /* * Check whether a caller with old credentials @old is allowed to switch to * credentials that contain @new_id. */ static bool id_permitted_for_cred(const struct cred *old, kid_t new_id, enum setid_type new_type) { bool permitted; /* If our old creds already had this ID in it, it's fine. */ if (new_type == UID) { if (uid_eq(new_id.uid, old->uid) || uid_eq(new_id.uid, old->euid) || uid_eq(new_id.uid, old->suid)) return true; } else if (new_type == GID){ if (gid_eq(new_id.gid, old->gid) || gid_eq(new_id.gid, old->egid) || gid_eq(new_id.gid, old->sgid)) return true; } else /* Error, new_type is an invalid type */ return false; /* * Transitions to new UIDs require a check against the policy of the old * RUID. */ permitted = setid_policy_lookup((kid_t){.uid = old->uid}, new_id, new_type) != SIDPOL_CONSTRAINED; if (!permitted) { if (new_type == UID) { pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n", __kuid_val(old->uid), __kuid_val(old->euid), __kuid_val(old->suid), __kuid_val(new_id.uid)); } else if (new_type == GID) { pr_warn("GID transition ((%d,%d,%d) -> %d) blocked\n", __kgid_val(old->gid), __kgid_val(old->egid), __kgid_val(old->sgid), __kgid_val(new_id.gid)); } else /* Error, new_type is an invalid type */ return false; } return permitted; } /* * Check whether there is either an exception for user under old cred struct to * set*uid to user under new cred struct, or the UID transition is allowed (by * Linux set*uid rules) even without CAP_SETUID. */ static int safesetid_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { /* Do nothing if there are no setuid restrictions for our old RUID. */ if (setid_policy_lookup((kid_t){.uid = old->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT) return 0; if (id_permitted_for_cred(old, (kid_t){.uid = new->uid}, UID) && id_permitted_for_cred(old, (kid_t){.uid = new->euid}, UID) && id_permitted_for_cred(old, (kid_t){.uid = new->suid}, UID) && id_permitted_for_cred(old, (kid_t){.uid = new->fsuid}, UID)) return 0; /* * Kill this process to avoid potential security vulnerabilities * that could arise from a missing allowlist entry preventing a * privileged process from dropping to a lesser-privileged one. */ force_sig(SIGKILL); return -EACCES; } static int safesetid_task_fix_setgid(struct cred *new, const struct cred *old, int flags) { /* Do nothing if there are no setgid restrictions for our old RGID. */ if (setid_policy_lookup((kid_t){.gid = old->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT) return 0; if (id_permitted_for_cred(old, (kid_t){.gid = new->gid}, GID) && id_permitted_for_cred(old, (kid_t){.gid = new->egid}, GID) && id_permitted_for_cred(old, (kid_t){.gid = new->sgid}, GID) && id_permitted_for_cred(old, (kid_t){.gid = new->fsgid}, GID)) return 0; /* * Kill this process to avoid potential security vulnerabilities * that could arise from a missing allowlist entry preventing a * privileged process from dropping to a lesser-privileged one. */ force_sig(SIGKILL); return -EACCES; } static int safesetid_task_fix_setgroups(struct cred *new, const struct cred *old) { int i; /* Do nothing if there are no setgid restrictions for our old RGID. */ if (setid_policy_lookup((kid_t){.gid = old->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT) return 0; get_group_info(new->group_info); for (i = 0; i < new->group_info->ngroups; i++) { if (!id_permitted_for_cred(old, (kid_t){.gid = new->group_info->gid[i]}, GID)) { put_group_info(new->group_info); /* * Kill this process to avoid potential security vulnerabilities * that could arise from a missing allowlist entry preventing a * privileged process from dropping to a lesser-privileged one. */ force_sig(SIGKILL); return -EACCES; } } put_group_info(new->group_info); return 0; } static const struct lsm_id safesetid_lsmid = { .name = "safesetid", .id = LSM_ID_SAFESETID, }; static struct security_hook_list safesetid_security_hooks[] = { LSM_HOOK_INIT(task_fix_setuid, safesetid_task_fix_setuid), LSM_HOOK_INIT(task_fix_setgid, safesetid_task_fix_setgid), LSM_HOOK_INIT(task_fix_setgroups, safesetid_task_fix_setgroups), LSM_HOOK_INIT(capable, safesetid_security_capable) }; static int __init safesetid_security_init(void) { security_add_hooks(safesetid_security_hooks, ARRAY_SIZE(safesetid_security_hooks), &safesetid_lsmid); /* Report that SafeSetID successfully initialized */ safesetid_initialized = 1; return 0; } DEFINE_LSM(safesetid_security_init) = { .init = safesetid_security_init, .name = "safesetid", }; |
| 249 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtio PCI driver - common functionality for all device versions * * This module allows virtio devices to be used over a virtual PCI device. * This can be used with QEMU based VMMs like KVM or Xen. * * Copyright IBM Corp. 2007 * Copyright Red Hat, Inc. 2014 * * Authors: * Anthony Liguori <aliguori@us.ibm.com> * Rusty Russell <rusty@rustcorp.com.au> * Michael S. Tsirkin <mst@redhat.com> */ #include "virtio_pci_common.h" static bool force_legacy = false; #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) module_param(force_legacy, bool, 0444); MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return false; return index == vp_dev->admin_vq.vq_index; } /* wait for pending irq handlers */ void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq) { /* we write the queue's selector into the notification register to * signal the other end */ iowrite16(vq->index, (void __iomem *)vq->priv); return true; } /* Notify all slow path virtqueues on an interrupt. */ static void vp_vring_slow_path_interrupt(int irq, struct virtio_pci_device *vp_dev) { struct virtio_pci_vq_info *info; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->slow_virtqueues, node) vring_interrupt(irq, info->vq); spin_unlock_irqrestore(&vp_dev->lock, flags); } /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } /* Notify all virtqueues on an interrupt. */ static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->virtqueues, node) { if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } /* A small wrapper to also acknowledge the interrupt when it's handled. * I really need an EIO hook for the vring so I can ack the interrupt once we * know that we'll be handling the IRQ but before we invoke the callback since * the callback may notify the host which results in the host attempting to * raise an interrupt that we would then mask once we acknowledged the * interrupt. */ static irqreturn_t vp_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; u8 isr; /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); /* It's definitely not us if the ISR was not high */ if (!isr) return IRQ_NONE; /* Configuration change? Tell driver if it wants to know. */ if (isr & VIRTIO_PCI_ISR_CONFIG) vp_config_changed(irq, opaque); return vp_vring_interrupt(irq, opaque); } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, bool per_vq_vectors, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned int flags = PCI_IRQ_MSIX; unsigned int i, v; int err = -ENOMEM; vp_dev->msix_vectors = nvectors; vp_dev->msix_names = kmalloc_array(nvectors, sizeof(*vp_dev->msix_names), GFP_KERNEL); if (!vp_dev->msix_names) goto error; vp_dev->msix_affinity_masks = kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); if (!vp_dev->msix_affinity_masks) goto error; for (i = 0; i < nvectors; ++i) if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], GFP_KERNEL)) goto error; if (!per_vq_vectors) desc = NULL; if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ } err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; /* Set the vector used for configuration */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; v = vp_dev->config_vector(vp_dev, v); /* Verify we had enough resources to assign the vector */ if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto error; } if (!per_vq_vectors) { /* Shared vector for all VQs */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; } return 0; error: return err; } static bool vp_is_slow_path_vector(u16 msix_vec) { return msix_vec == VP_MSIX_CONFIG_VECTOR; } static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, u16 msix_vec, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); struct virtqueue *vq; unsigned long flags; /* fill out our structure that represents an active queue */ if (!info) return ERR_PTR(-ENOMEM); vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); if (!vp_is_slow_path_vector(msix_vec)) list_add(&info->node, &vp_dev->virtqueues); else list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } *p_info = info; return vq; out_info: kfree(info); return vq; } static void vp_del_vq(struct virtqueue *vq, struct virtio_pci_vq_info *info) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); unsigned long flags; /* * If it fails during re-enable reset vq. This way we won't rejoin * info->node to the queue. Prevent unexpected irqs. */ if (!vq->reset) { spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); } vp_dev->del_vq(info); kfree(info); } /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq, *n; int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vp_is_avq(vdev, vq->index) ? vp_dev->admin_vq.info : vp_dev->vqs[vq->index]; if (vp_dev->per_vq_vectors) { int v = info->msix_vector; if (v != VIRTIO_MSI_NO_VECTOR && !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } vp_del_vq(vq, info); } vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); vp_dev->intx_enabled = 0; } for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); pci_free_irq_vectors(vp_dev->pci_dev); vp_dev->msix_enabled = 0; } vp_dev->msix_vectors = 0; vp_dev->msix_used_vectors = 0; kfree(vp_dev->msix_names); vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; kfree(vp_dev->vqs); vp_dev->vqs = NULL; } enum vp_vq_vector_policy { VP_VQ_VECTOR_POLICY_EACH, VP_VQ_VECTOR_POLICY_SHARED_SLOW, VP_VQ_VECTOR_POLICY_SHARED, }; static struct virtqueue * vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, vq_callback_t *callback, const char *name, bool ctx, bool slow_path, int *allocated_vectors, enum vp_vq_vector_policy vector_policy, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; u16 msix_vec; int err; if (!callback) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && !slow_path)) msix_vec = (*allocated_vectors)++; else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && slow_path) msix_vec = VP_MSIX_CONFIG_VECTOR; else msix_vec = VP_MSIX_VQ_VECTOR; vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, p_info); if (IS_ERR(vq)) return vq; if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || msix_vec == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(msix_vec)) return vq; /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), vring_interrupt, 0, vp_dev->msix_names[msix_vec], vq); if (err) { vp_del_vq(vq, *p_info); return ERR_PTR(err); } return vq; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; int i, err, nvectors, allocated_vectors, queue_idx = 0; struct virtqueue *vq; bool per_vq_vectors; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto error_find; } per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (vqi->name && vqi->callback) ++nvectors; } if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, false, &allocated_vectors, vector_policy, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, avq->name, false, true, &allocated_vectors, vector_policy, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto error_find; } return 0; error_find: vp_del_vqs(vdev); return err; } static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; struct virtqueue *vq; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto out_del_vqs; } err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { struct virtqueue_info *vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, false, VIRTIO_MSI_NO_VECTOR, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto out_del_vqs; } return 0; out_del_vqs: vp_del_vqs(vdev); return err; } /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_EACH, desc); if (!err) return 0; /* Fallback: MSI-X with one shared vector for config and * slow path queues, one vector per queue for the rest. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ if (!(to_vp_device(vdev)->pci_dev->irq)) return err; /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, vqs_info); } const char *vp_bus_name(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); return pci_name(vp_dev->pci_dev); } /* Setup the affinity for a virtqueue: * - force the affinity for per vq vector * - OR over all affinities for shared MSI * - ignore the affinity request if we're using INTX */ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct cpumask *mask; unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); irq_set_affinity_and_hint(irq, mask); } } return 0; } const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, vp_dev->vqs[index]->msix_vector); } #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = virtio_device_freeze(&vp_dev->vdev); if (!ret) pci_disable_device(pci_dev); return ret; } static int virtio_pci_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = pci_enable_device(pci_dev); if (ret) return ret; pci_set_master(pci_dev); return virtio_device_restore(&vp_dev->vdev); } static bool vp_supports_pm_no_reset(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); u16 pmcsr; if (!pci_dev->pm_cap) return false; pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); if (PCI_POSSIBLE_ERROR(pmcsr)) { dev_err(dev, "Unable to query pmcsr"); return false; } return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; } static int virtio_pci_suspend(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, .freeze = virtio_pci_freeze, .thaw = virtio_pci_restore, .poweroff = virtio_pci_freeze, .restore = virtio_pci_restore, }; #endif /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static const struct pci_device_id virtio_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); static void virtio_pci_release_dev(struct device *_d) { struct virtio_device *vdev = dev_to_virtio(_d); struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* As struct device is a kobject, it's not safe to * free the memory (including the reference counter itself) * until it's release callback. */ kfree(vp_dev); } static int virtio_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { struct virtio_pci_device *vp_dev, *reg_dev = NULL; int rc; /* allocate our structure and fill it out */ vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); if (!vp_dev) return -ENOMEM; pci_set_drvdata(pci_dev, vp_dev); vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); if (rc) goto err_enable_device; if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ if (rc == -ENODEV || rc == -ENOMEM) rc = virtio_pci_modern_probe(vp_dev); if (rc) goto err_probe; } else { rc = virtio_pci_modern_probe(vp_dev); if (rc == -ENODEV) rc = virtio_pci_legacy_probe(vp_dev); if (rc) goto err_probe; } pci_set_master(pci_dev); rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) goto err_register; return 0; err_register: if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: if (reg_dev) put_device(&vp_dev->vdev.dev); else kfree(vp_dev); return rc; } static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); /* * Device is marked broken on surprise removal so that virtio upper * layers can abort any ongoing operation. */ if (!pci_device_is_present(pci_dev)) virtio_break_device(&vp_dev->vdev); pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); put_device(dev); } static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct virtio_device *vdev = &vp_dev->vdev; int ret; if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) return -EBUSY; if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV)) return -EINVAL; if (pci_vfs_assigned(pci_dev)) return -EPERM; if (num_vfs == 0) { pci_disable_sriov(pci_dev); return 0; } ret = pci_enable_sriov(pci_dev, num_vfs); if (ret < 0) return ret; return num_vfs; } static void virtio_pci_reset_prepare(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret = 0; ret = virtio_device_reset_prepare(&vp_dev->vdev); if (ret) { if (ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset prepare failure: %d", ret); return; } if (pci_is_enabled(pci_dev)) pci_disable_device(pci_dev); } static void virtio_pci_reset_done(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; if (pci_is_enabled(pci_dev)) return; ret = pci_enable_device(pci_dev); if (!ret) { pci_set_master(pci_dev); ret = virtio_device_reset_done(&vp_dev->vdev); } if (ret && ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset done failure: %d", ret); } static const struct pci_error_handlers virtio_pci_err_handler = { .reset_prepare = virtio_pci_reset_prepare, .reset_done = virtio_pci_reset_done, }; static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, #ifdef CONFIG_PM_SLEEP .driver.pm = &virtio_pci_pm_ops, #endif .sriov_configure = virtio_pci_sriov_configure, .err_handler = &virtio_pci_err_handler, }; struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev) { struct virtio_pci_device *pf_vp_dev; pf_vp_dev = pci_iov_get_pf_drvdata(pdev, &virtio_pci_driver); if (IS_ERR(pf_vp_dev)) return NULL; return &pf_vp_dev->vdev; } module_pci_driver(virtio_pci_driver); MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); MODULE_DESCRIPTION("virtio-pci"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); |
| 7 1 7 12 5 83 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | // SPDX-License-Identifier: GPL-2.0-or-later /* * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. * See also net/ipv4/udplite.c * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: * Fixes: */ #define pr_fmt(fmt) "UDPLite6: " fmt #include <linux/export.h> #include <linux/proc_fs.h> #include "udp_impl.h" static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; } static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static int udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { return __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } static const struct inet6_protocol udplitev6_protocol = { .handler = udplitev6_rcv, .err_handler = udplitev6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; struct proto udplitev6_prot = { .name = "UDPLITEv6", .owner = THIS_MODULE, .close = udp_lib_close, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udplitev6_sk_init, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), .h.udp_table = &udplite_table, }; static struct inet_protosw udplite6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }; int __init udplitev6_init(void) { int ret; ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); if (ret) goto out; ret = inet6_register_protosw(&udplite6_protosw); if (ret) goto out_udplitev6_protocol; out: return ret; out_udplitev6_protocol: inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); goto out; } void udplitev6_exit(void) { inet6_unregister_protosw(&udplite6_protosw); inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); } #ifdef CONFIG_PROC_FS static struct udp_seq_afinfo udplite6_seq_afinfo = { .family = AF_INET6, .udp_table = &udplite_table, }; static int __net_init udplite6_proc_init_net(struct net *net) { if (!proc_create_net_data("udplite6", 0444, net->proc_net, &udp6_seq_ops, sizeof(struct udp_iter_state), &udplite6_seq_afinfo)) return -ENOMEM; return 0; } static void __net_exit udplite6_proc_exit_net(struct net *net) { remove_proc_entry("udplite6", net->proc_net); } static struct pernet_operations udplite6_net_ops = { .init = udplite6_proc_init_net, .exit = udplite6_proc_exit_net, }; int __init udplite6_proc_init(void) { return register_pernet_subsys(&udplite6_net_ops); } void udplite6_proc_exit(void) { unregister_pernet_subsys(&udplite6_net_ops); } #endif |
| 3 2 1 3 2 1 1 1 2 9 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | /* * Copyright 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software") * to deal in the software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * them Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/dma-buf.h> #include <linux/dma-resv.h> #include <drm/drm_file.h> #include "vgem_drv.h" #define VGEM_FENCE_TIMEOUT (10*HZ) struct vgem_fence { struct dma_fence base; struct spinlock lock; struct timer_list timer; }; static const char *vgem_fence_get_driver_name(struct dma_fence *fence) { return "vgem"; } static const char *vgem_fence_get_timeline_name(struct dma_fence *fence) { return "unbound"; } static void vgem_fence_release(struct dma_fence *base) { struct vgem_fence *fence = container_of(base, typeof(*fence), base); timer_delete_sync(&fence->timer); dma_fence_free(&fence->base); } static const struct dma_fence_ops vgem_fence_ops = { .get_driver_name = vgem_fence_get_driver_name, .get_timeline_name = vgem_fence_get_timeline_name, .release = vgem_fence_release, }; static void vgem_fence_timeout(struct timer_list *t) { struct vgem_fence *fence = timer_container_of(fence, t, timer); dma_fence_signal(&fence->base); } static struct dma_fence *vgem_fence_create(struct vgem_file *vfile, unsigned int flags) { struct vgem_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (!fence) return NULL; spin_lock_init(&fence->lock); dma_fence_init(&fence->base, &vgem_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); timer_setup(&fence->timer, vgem_fence_timeout, 0); /* We force the fence to expire within 10s to prevent driver hangs */ mod_timer(&fence->timer, jiffies + VGEM_FENCE_TIMEOUT); return &fence->base; } /* * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH): * * Create and attach a fence to the vGEM handle. This fence is then exposed * via the dma-buf reservation object and visible to consumers of the exported * dma-buf. If the flags contain VGEM_FENCE_WRITE, the fence indicates the * vGEM buffer is being written to by the client and is exposed as an exclusive * fence, otherwise the fence indicates the client is current reading from the * buffer and all future writes should wait for the client to signal its * completion. Note that if a conflicting fence is already on the dma-buf (i.e. * an exclusive fence when adding a read, or any fence when adding a write), * -EBUSY is reported. Serialisation between operations should be handled * by waiting upon the dma-buf. * * This returns the handle for the new fence that must be signaled within 10 * seconds (or otherwise it will automatically expire). See * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL). * * If the vGEM handle does not exist, vgem_fence_attach_ioctl returns -ENOENT. */ int vgem_fence_attach_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_vgem_fence_attach *arg = data; struct vgem_file *vfile = file->driver_priv; struct dma_resv *resv; struct drm_gem_object *obj; enum dma_resv_usage usage; struct dma_fence *fence; int ret; if (arg->flags & ~VGEM_FENCE_WRITE) return -EINVAL; if (arg->pad) return -EINVAL; obj = drm_gem_object_lookup(file, arg->handle); if (!obj) return -ENOENT; fence = vgem_fence_create(vfile, arg->flags); if (!fence) { ret = -ENOMEM; goto err; } /* Check for a conflicting fence */ resv = obj->resv; usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE); if (!dma_resv_test_signaled(resv, usage)) { ret = -EBUSY; goto err_fence; } /* Expose the fence via the dma-buf */ dma_resv_lock(resv, NULL); ret = dma_resv_reserve_fences(resv, 1); if (!ret) dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); dma_resv_unlock(resv); /* Record the fence in our idr for later signaling */ if (ret == 0) { mutex_lock(&vfile->fence_mutex); ret = idr_alloc(&vfile->fence_idr, fence, 1, 0, GFP_KERNEL); mutex_unlock(&vfile->fence_mutex); if (ret > 0) { arg->out_fence = ret; ret = 0; } } err_fence: if (ret) { dma_fence_signal(fence); dma_fence_put(fence); } err: drm_gem_object_put(obj); return ret; } /* * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL): * * Signal and consume a fence ealier attached to a vGEM handle using * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH). * * All fences must be signaled within 10s of attachment or otherwise they * will automatically expire (and a vgem_fence_signal_ioctl returns -ETIMEDOUT). * * Signaling a fence indicates to all consumers of the dma-buf that the * client has completed the operation associated with the fence, and that the * buffer is then ready for consumption. * * If the fence does not exist (or has already been signaled by the client), * vgem_fence_signal_ioctl returns -ENOENT. */ int vgem_fence_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct vgem_file *vfile = file->driver_priv; struct drm_vgem_fence_signal *arg = data; struct dma_fence *fence; int ret = 0; if (arg->flags) return -EINVAL; mutex_lock(&vfile->fence_mutex); fence = idr_replace(&vfile->fence_idr, NULL, arg->fence); mutex_unlock(&vfile->fence_mutex); if (!fence) return -ENOENT; if (IS_ERR(fence)) return PTR_ERR(fence); if (dma_fence_is_signaled(fence)) ret = -ETIMEDOUT; dma_fence_signal(fence); dma_fence_put(fence); return ret; } int vgem_fence_open(struct vgem_file *vfile) { mutex_init(&vfile->fence_mutex); idr_init_base(&vfile->fence_idr, 1); return 0; } static int __vgem_fence_idr_fini(int id, void *p, void *data) { dma_fence_signal(p); dma_fence_put(p); return 0; } void vgem_fence_close(struct vgem_file *vfile) { idr_for_each(&vfile->fence_idr, __vgem_fence_idr_fini, vfile); idr_destroy(&vfile->fence_idr); mutex_destroy(&vfile->fence_mutex); } |
| 20 3 3 3 95 45 54 95 274 139 139 139 48 1 1 1 116 1268 834 834 801 781 517 512 274 1267 1869 1871 1267 25 24 1247 1267 1873 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 | #include <linux/init.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_arp.h> #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv4(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV4, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, [NF_INET_FORWARD] = nft_do_chain_ipv4, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; static void nft_chain_filter_ipv4_init(void) { nft_register_chain_type(&nft_chain_filter_ipv4); } static void nft_chain_filter_ipv4_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv4); } #else static inline void nft_chain_filter_ipv4_init(void) {} static inline void nft_chain_filter_ipv4_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV4 */ #ifdef CONFIG_NF_TABLES_ARP static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_unspec(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_ARP, .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, }, }; static void nft_chain_filter_arp_init(void) { nft_register_chain_type(&nft_chain_filter_arp); } static void nft_chain_filter_arp_fini(void) { nft_unregister_chain_type(&nft_chain_filter_arp); } #else static inline void nft_chain_filter_arp_init(void) {} static inline void nft_chain_filter_arp_fini(void) {} #endif /* CONFIG_NF_TABLES_ARP */ #ifdef CONFIG_NF_TABLES_IPV6 static unsigned int nft_do_chain_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv6(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV6, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, [NF_INET_FORWARD] = nft_do_chain_ipv6, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; static void nft_chain_filter_ipv6_init(void) { nft_register_chain_type(&nft_chain_filter_ipv6); } static void nft_chain_filter_ipv6_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv6); } #else static inline void nft_chain_filter_ipv6_init(void) {} static inline void nft_chain_filter_ipv6_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #ifdef CONFIG_NF_TABLES_INET static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (state->pf) { case NFPROTO_IPV4: nft_set_pktinfo_ipv4(&pkt); break; case NFPROTO_IPV6: nft_set_pktinfo_ipv6(&pkt); break; default: break; } return nft_do_chain(&pkt, priv); } static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_hook_state ingress_state = *state; struct nft_pktinfo pkt; switch (skb->protocol) { case htons(ETH_P_IP): /* Original hook is NFPROTO_NETDEV and NF_NETDEV_INGRESS. */ ingress_state.pf = NFPROTO_IPV4; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0) return NF_DROP; break; case htons(ETH_P_IPV6): ingress_state.pf = NFPROTO_IPV6; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0) return NF_DROP; break; default: return NF_ACCEPT; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_INET, .hook_mask = (1 << NF_INET_INGRESS) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_INGRESS] = nft_do_chain_inet_ingress, [NF_INET_LOCAL_IN] = nft_do_chain_inet, [NF_INET_LOCAL_OUT] = nft_do_chain_inet, [NF_INET_FORWARD] = nft_do_chain_inet, [NF_INET_PRE_ROUTING] = nft_do_chain_inet, [NF_INET_POST_ROUTING] = nft_do_chain_inet, }, }; static void nft_chain_filter_inet_init(void) { nft_register_chain_type(&nft_chain_filter_inet); } static void nft_chain_filter_inet_fini(void) { nft_unregister_chain_type(&nft_chain_filter_inet); } #else static inline void nft_chain_filter_inet_init(void) {} static inline void nft_chain_filter_inet_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .hook_mask = (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_POST_ROUTING), .hooks = { [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, [NF_BR_FORWARD] = nft_do_chain_bridge, [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, [NF_BR_POST_ROUTING] = nft_do_chain_bridge, }, }; static void nft_chain_filter_bridge_init(void) { nft_register_chain_type(&nft_chain_filter_bridge); } static void nft_chain_filter_bridge_fini(void) { nft_unregister_chain_type(&nft_chain_filter_bridge); } #else static inline void nft_chain_filter_bridge_init(void) {} static inline void nft_chain_filter_bridge_fini(void) {} #endif /* CONFIG_NF_TABLES_BRIDGE */ #ifdef CONFIG_NF_TABLES_NETDEV static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (skb->protocol) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, .hook_mask = (1 << NF_NETDEV_INGRESS) | (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; static int nft_netdev_event(unsigned long event, struct net_device *dev, struct nft_base_chain *basechain, bool changename) { struct nft_table *table = basechain->chain.table; struct nf_hook_ops *ops; struct nft_hook *hook; bool match; list_for_each_entry(hook, &basechain->hook_list, list) { ops = nft_hook_find_ops(hook, dev); match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); switch (event) { case NETDEV_UNREGISTER: /* NOP if not found or new name still matching */ if (!ops || (changename && match)) continue; if (!(table->flags & NFT_TABLE_F_DORMANT)) nf_unregister_net_hook(dev_net(dev), ops); list_del_rcu(&ops->list); kfree_rcu(ops, rcu); break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ if (!match || (changename && ops)) continue; ops = kmemdup(&basechain->ops, sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); if (!ops) return 1; ops->dev = dev; if (!(table->flags & NFT_TABLE_F_DORMANT) && nf_register_net_hook(dev_net(dev), ops)) { kfree(ops); return 1; } list_add_tail_rcu(&ops->list, &hook->ops_list); break; } break; } return 0; } static int __nf_tables_netdev_event(unsigned long event, struct net_device *dev, bool changename) { struct nft_base_chain *basechain; struct nftables_pernet *nft_net; struct nft_chain *chain; struct nft_table *table; nft_net = nft_pernet(dev_net(dev)); list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV && table->family != NFPROTO_INET) continue; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) continue; basechain = nft_base_chain(chain); if (table->family == NFPROTO_INET && basechain->ops.hooknum != NF_INET_INGRESS) continue; if (nft_netdev_event(event, dev, basechain, changename)) return 1; } } return 0; } static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; int ret = NOTIFY_DONE; if (event != NETDEV_REGISTER && event != NETDEV_UNREGISTER && event != NETDEV_CHANGENAME) return NOTIFY_DONE; nft_net = nft_pernet(dev_net(dev)); mutex_lock(&nft_net->commit_mutex); if (event == NETDEV_CHANGENAME) { if (__nf_tables_netdev_event(NETDEV_REGISTER, dev, true)) { ret = NOTIFY_BAD; goto out_unlock; } __nf_tables_netdev_event(NETDEV_UNREGISTER, dev, true); } else if (__nf_tables_netdev_event(event, dev, false)) { ret = NOTIFY_BAD; } out_unlock: mutex_unlock(&nft_net->commit_mutex); return ret; } static struct notifier_block nf_tables_netdev_notifier = { .notifier_call = nf_tables_netdev_event, }; static int nft_chain_filter_netdev_init(void) { int err; nft_register_chain_type(&nft_chain_filter_netdev); err = register_netdevice_notifier(&nf_tables_netdev_notifier); if (err) goto err_register_netdevice_notifier; return 0; err_register_netdevice_notifier: nft_unregister_chain_type(&nft_chain_filter_netdev); return err; } static void nft_chain_filter_netdev_fini(void) { nft_unregister_chain_type(&nft_chain_filter_netdev); unregister_netdevice_notifier(&nf_tables_netdev_notifier); } #else static inline int nft_chain_filter_netdev_init(void) { return 0; } static inline void nft_chain_filter_netdev_fini(void) {} #endif /* CONFIG_NF_TABLES_NETDEV */ int __init nft_chain_filter_init(void) { int err; err = nft_chain_filter_netdev_init(); if (err < 0) return err; nft_chain_filter_ipv4_init(); nft_chain_filter_ipv6_init(); nft_chain_filter_arp_init(); nft_chain_filter_inet_init(); nft_chain_filter_bridge_init(); return 0; } void nft_chain_filter_fini(void) { nft_chain_filter_bridge_fini(); nft_chain_filter_inet_fini(); nft_chain_filter_arp_fini(); nft_chain_filter_ipv6_fini(); nft_chain_filter_ipv4_fini(); nft_chain_filter_netdev_fini(); } |
| 4 4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Glue Code for the AVX/AES-NI/GFNI assembler implementation of the ARIA Cipher * * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> */ #include <crypto/algapi.h> #include <crypto/aria.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> #include "ecb_cbc_helpers.h" #include "aria-avx.h" asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_encrypt_16way); asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_decrypt_16way); asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way); asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way); asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_decrypt_16way); asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way); static struct aria_avx_ops aria_ops; struct aria_avx_request_ctx { u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; }; static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) { ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); ECB_BLOCK(1, aria_encrypt); ECB_WALK_END(); } static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) { ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); ECB_BLOCK(1, aria_decrypt); ECB_WALK_END(); } static int aria_avx_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_encrypt(req, ctx->enc_key[0]); } static int aria_avx_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_decrypt(req, ctx->dec_key[0]); } static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return aria_set_key(&tfm->base, key, keylen); } static int aria_avx_ctr_encrypt(struct skcipher_request *req) { struct aria_avx_request_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { kernel_fpu_begin(); aria_ops.aria_ctr_crypt_16way(ctx, dst, src, &req_ctx->keystream[0], walk.iv); kernel_fpu_end(); dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; } while (nbytes >= ARIA_BLOCK_SIZE) { memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); crypto_inc(walk.iv, ARIA_BLOCK_SIZE); aria_encrypt(ctx, &req_ctx->keystream[0], &req_ctx->keystream[0]); crypto_xor_cpy(dst, src, &req_ctx->keystream[0], ARIA_BLOCK_SIZE); dst += ARIA_BLOCK_SIZE; src += ARIA_BLOCK_SIZE; nbytes -= ARIA_BLOCK_SIZE; } if (walk.nbytes == walk.total && nbytes > 0) { memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); crypto_inc(walk.iv, ARIA_BLOCK_SIZE); aria_encrypt(ctx, &req_ctx->keystream[0], &req_ctx->keystream[0]); crypto_xor_cpy(dst, src, &req_ctx->keystream[0], nbytes); dst += nbytes; src += nbytes; nbytes = 0; } err = skcipher_walk_done(&walk, nbytes); } return err; } static int aria_avx_init_tfm(struct crypto_skcipher *tfm) { crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx_request_ctx)); return 0; } static struct skcipher_alg aria_algs[] = { { .base.cra_name = "ecb(aria)", .base.cra_driver_name = "ecb-aria-avx", .base.cra_priority = 400, .base.cra_blocksize = ARIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aria_ctx), .base.cra_module = THIS_MODULE, .min_keysize = ARIA_MIN_KEY_SIZE, .max_keysize = ARIA_MAX_KEY_SIZE, .setkey = aria_avx_set_key, .encrypt = aria_avx_ecb_encrypt, .decrypt = aria_avx_ecb_decrypt, }, { .base.cra_name = "ctr(aria)", .base.cra_driver_name = "ctr-aria-avx", .base.cra_priority = 400, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct aria_ctx), .base.cra_module = THIS_MODULE, .min_keysize = ARIA_MIN_KEY_SIZE, .max_keysize = ARIA_MAX_KEY_SIZE, .ivsize = ARIA_BLOCK_SIZE, .chunksize = ARIA_BLOCK_SIZE, .walksize = 16 * ARIA_BLOCK_SIZE, .setkey = aria_avx_set_key, .encrypt = aria_avx_ctr_encrypt, .decrypt = aria_avx_ctr_encrypt, .init = aria_avx_init_tfm, } }; static int __init aria_avx_init(void) { const char *feature_name; if (!boot_cpu_has(X86_FEATURE_AVX) || !boot_cpu_has(X86_FEATURE_AES) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); return -ENODEV; } if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } if (boot_cpu_has(X86_FEATURE_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; } else { aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way; } return crypto_register_skciphers(aria_algs, ARRAY_SIZE(aria_algs)); } static void __exit aria_avx_exit(void) { crypto_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs)); } module_init(aria_avx_init); module_exit(aria_avx_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX/AES-NI/GFNI optimized"); MODULE_ALIAS_CRYPTO("aria"); MODULE_ALIAS_CRYPTO("aria-aesni-avx"); |
| 2 2 1 1 2 2 2 2 2 2 2 1 1 1 1 1 1 5 3 5 2 2 1 5 5 5 5 5 3 7 2 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 3 3 3 19 1 1 7 4 2 3 3 2 1 7 4 2 7 2 1 7 1 1 20 15 2 4 1 3 4 4 1 1 2 15 41 4 7 3 3 3 2 5 4 3 2 4 2 2 3 3 2 1 1 1 2 3 4 3 2 1 5 4 3 3 3 2 2 1 8 7 7 6 5 2 1 41 19 5 1 5 2 1 1 2 1 1 3 1 1 1 1 1 18 8 7 5 3 4 4 3 3 2 1 1 3 2 8 8 6 5 4 6 4 2 1 1 1 3 3 3 8 7 7 8 5 4 3 2 2 5 1 1 1 1 1 1 2 2 2 2 2 2 2 7 2 5 3 1 2 1 1 1 281 24 1 24 303 43 43 43 43 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * KVM Xen emulation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "x86.h" #include "xen.h" #include "hyperv.h" #include "irq.h" #include <linux/eventfd.h> #include <linux/kvm_host.h> #include <linux/sched/stat.h> #include <trace/events/kvm.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> #include <xen/interface/version.h> #include <xen/interface/event_channel.h> #include <xen/interface/sched.h> #include <asm/xen/cpuid.h> #include <asm/pvclock.h> #include "cpuid.h" #include "trace.h" static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm); static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data); static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r); DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); static int kvm_xen_shared_info_init(struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct pvclock_wall_clock *wc; u32 *wc_sec_hi; u32 wc_version; u64 wall_nsec; int ret = 0; int idx = srcu_read_lock(&kvm->srcu); read_lock_irq(&gpc->lock); while (!kvm_gpc_check(gpc, PAGE_SIZE)) { read_unlock_irq(&gpc->lock); ret = kvm_gpc_refresh(gpc, PAGE_SIZE); if (ret) goto out; read_lock_irq(&gpc->lock); } /* * This code mirrors kvm_write_wall_clock() except that it writes * directly through the pfn cache and doesn't mark the page dirty. */ wall_nsec = kvm_get_wall_clock_epoch(kvm); /* Paranoia checks on the 32-bit struct layout */ BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); #ifdef CONFIG_X86_64 /* Paranoia checks on the 64-bit struct layout */ BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; wc_sec_hi = &shinfo->wc_sec_hi; wc = &shinfo->wc; } else #endif { struct compat_shared_info *shinfo = gpc->khva; wc_sec_hi = &shinfo->arch.wc_sec_hi; wc = &shinfo->wc; } /* Increment and ensure an odd value */ wc_version = wc->version = (wc->version + 1) | 1; smp_wmb(); wc->nsec = do_div(wall_nsec, NSEC_PER_SEC); wc->sec = (u32)wall_nsec; *wc_sec_hi = wall_nsec >> 32; smp_wmb(); wc->version = wc_version + 1; read_unlock_irq(&gpc->lock); kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); out: srcu_read_unlock(&kvm->srcu, idx); return ret; } void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu) { if (atomic_read(&vcpu->arch.xen.timer_pending) > 0) { struct kvm_xen_evtchn e; e.vcpu_id = vcpu->vcpu_id; e.vcpu_idx = vcpu->vcpu_idx; e.port = vcpu->arch.xen.timer_virq; e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; kvm_xen_set_evtchn(&e, vcpu->kvm); vcpu->arch.xen.timer_expires = 0; atomic_set(&vcpu->arch.xen.timer_pending, 0); } } static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer) { struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu, arch.xen.timer); struct kvm_xen_evtchn e; int rc; if (atomic_read(&vcpu->arch.xen.timer_pending)) return HRTIMER_NORESTART; e.vcpu_id = vcpu->vcpu_id; e.vcpu_idx = vcpu->vcpu_idx; e.port = vcpu->arch.xen.timer_virq; e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm); if (rc != -EWOULDBLOCK) { vcpu->arch.xen.timer_expires = 0; return HRTIMER_NORESTART; } atomic_inc(&vcpu->arch.xen.timer_pending); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); return HRTIMER_NORESTART; } static int xen_get_guest_pvclock(struct kvm_vcpu *vcpu, struct pvclock_vcpu_time_info *hv_clock, struct gfn_to_pfn_cache *gpc, unsigned int offset) { unsigned long flags; int r; read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, offset + sizeof(*hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); r = kvm_gpc_refresh(gpc, offset + sizeof(*hv_clock)); if (r) return r; read_lock_irqsave(&gpc->lock, flags); } memcpy(hv_clock, gpc->khva + offset, sizeof(*hv_clock)); read_unlock_irqrestore(&gpc->lock, flags); /* * Sanity check TSC shift+multiplier to verify the guest's view of time * is more or less consistent. */ if (hv_clock->tsc_shift != vcpu->arch.pvclock_tsc_shift || hv_clock->tsc_to_system_mul != vcpu->arch.pvclock_tsc_mul) return -EINVAL; return 0; } static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, bool linux_wa) { struct kvm_vcpu_xen *xen = &vcpu->arch.xen; int64_t kernel_now, delta; uint64_t guest_now; int r = -EOPNOTSUPP; /* * The guest provides the requested timeout in absolute nanoseconds * of the KVM clock — as *it* sees it, based on the scaled TSC and * the pvclock information provided by KVM. * * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW * so use CLOCK_MONOTONIC. In the timescales covered by timers, the * difference won't matter much as there is no cumulative effect. * * Calculate the time for some arbitrary point in time around "now" * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the * delta between the kvmclock "now" value and the guest's requested * timeout, apply the "Linux workaround" described below, and add * the resulting delta to the CLOCK_MONOTONIC "now" value, to get * the absolute CLOCK_MONOTONIC time at which the timer should * fire. */ do { struct pvclock_vcpu_time_info hv_clock; uint64_t host_tsc, guest_tsc; if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC) || !vcpu->kvm->arch.use_master_clock) break; /* * If both Xen PV clocks are active, arbitrarily try to use the * compat clock first, but also try to use the non-compat clock * if the compat clock is unusable. The two PV clocks hold the * same information, but it's possible one (or both) is stale * and/or currently unreachable. */ if (xen->vcpu_info_cache.active) r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_info_cache, offsetof(struct compat_vcpu_info, time)); if (r && xen->vcpu_time_info_cache.active) r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_time_info_cache, 0); if (r) break; if (!IS_ENABLED(CONFIG_64BIT) || !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) { /* * Don't fall back to get_kvmclock_ns() because it's * broken; it has a systemic error in its results * because it scales directly from host TSC to * nanoseconds, and doesn't scale first to guest TSC * and *then* to nanoseconds as the guest does. * * There is a small error introduced here because time * continues to elapse between the ktime_get() and the * subsequent rdtsc(). But not the systemic drift due * to get_kvmclock_ns(). */ kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */ host_tsc = rdtsc(); } /* Calculate the guest kvmclock as the guest would do it. */ guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc); guest_now = __pvclock_read_cycles(&hv_clock, guest_tsc); } while (0); if (r) { /* * Without CONSTANT_TSC, get_kvmclock_ns() is the only option. * * Also if the guest PV clock hasn't been set up yet, as is * likely to be the case during migration when the vCPU has * not been run yet. It would be possible to calculate the * scaling factors properly in that case but there's not much * point in doing so. The get_kvmclock_ns() drift accumulates * over time, so it's OK to use it at startup. Besides, on * migration there's going to be a little bit of skew in the * precise moment at which timers fire anyway. Often they'll * be in the "past" by the time the VM is running again after * migration. */ guest_now = get_kvmclock_ns(vcpu->kvm); kernel_now = ktime_get(); } delta = guest_abs - guest_now; /* * Xen has a 'Linux workaround' in do_set_timer_op() which checks for * negative absolute timeout values (caused by integer overflow), and * for values about 13 days in the future (2^50ns) which would be * caused by jiffies overflow. For those cases, Xen sets the timeout * 100ms in the future (not *too* soon, since if a guest really did * set a long timeout on purpose we don't want to keep churning CPU * time by waking it up). Emulate Xen's workaround when starting the * timer in response to __HYPERVISOR_set_timer_op. */ if (linux_wa && unlikely((int64_t)guest_abs < 0 || (delta > 0 && (uint32_t) (delta >> 50) != 0))) { delta = 100 * NSEC_PER_MSEC; guest_abs = guest_now + delta; } /* * Avoid races with the old timer firing. Checking timer_expires * to avoid calling hrtimer_cancel() will only have false positives * so is fine. */ if (vcpu->arch.xen.timer_expires) hrtimer_cancel(&vcpu->arch.xen.timer); atomic_set(&vcpu->arch.xen.timer_pending, 0); vcpu->arch.xen.timer_expires = guest_abs; if (delta <= 0) xen_timer_callback(&vcpu->arch.xen.timer); else hrtimer_start(&vcpu->arch.xen.timer, ktime_add_ns(kernel_now, delta), HRTIMER_MODE_ABS_HARD); } static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) { hrtimer_cancel(&vcpu->arch.xen.timer); vcpu->arch.xen.timer_expires = 0; atomic_set(&vcpu->arch.xen.timer_pending, 0); } static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) { struct kvm_vcpu_xen *vx = &v->arch.xen; struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; size_t user_len, user_len1, user_len2; struct vcpu_runstate_info rs; unsigned long flags; size_t times_ofs; uint8_t *update_bit = NULL; uint64_t entry_time; uint64_t *rs_times; int *rs_state; /* * The only difference between 32-bit and 64-bit versions of the * runstate struct is the alignment of uint64_t in 32-bit, which * means that the 64-bit version has an additional 4 bytes of * padding after the first field 'state'. Let's be really really * paranoid about that, and matching it with our internal data * structures that we memcpy into it... */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); #ifdef CONFIG_X86_64 /* * The 64-bit structure has 4 bytes of padding before 'state_entry_time' * so each subsequent field is shifted by 4, and it's 4 bytes longer. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); #endif /* * The state field is in the same place at the start of both structs, * and is the same size (int) as vx->current_runstate. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != sizeof(vx->current_runstate)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); /* * The state_entry_time field is 64 bits in both versions, and the * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 * is little-endian means that it's in the last *byte* of the word. * That detail is important later. */ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(uint64_t)); BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); /* * The time array is four 64-bit quantities in both versions, matching * the vx->runstate_times and immediately following state_entry_time. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof_field(struct compat_vcpu_runstate_info, time)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { user_len = sizeof(struct vcpu_runstate_info); times_ofs = offsetof(struct vcpu_runstate_info, state_entry_time); } else { user_len = sizeof(struct compat_vcpu_runstate_info); times_ofs = offsetof(struct compat_vcpu_runstate_info, state_entry_time); } /* * There are basically no alignment constraints. The guest can set it * up so it crosses from one page to the next, and at arbitrary byte * alignment (and the 32-bit ABI doesn't align the 64-bit integers * anyway, even if the overall struct had been 64-bit aligned). */ if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); user_len2 = user_len - user_len1; } else { user_len1 = user_len; user_len2 = 0; } BUG_ON(user_len1 + user_len2 != user_len); retry: /* * Attempt to obtain the GPC lock on *both* (if there are two) * gfn_to_pfn caches that cover the region. */ if (atomic) { local_irq_save(flags); if (!read_trylock(&gpc1->lock)) { local_irq_restore(flags); return; } } else { read_lock_irqsave(&gpc1->lock, flags); } while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; if (kvm_gpc_refresh(gpc1, user_len1)) return; read_lock_irqsave(&gpc1->lock, flags); } if (likely(!user_len2)) { /* * Set up three pointers directly to the runstate_info * struct in the guest (via the GPC). * * • @rs_state → state field * • @rs_times → state_entry_time field. * • @update_bit → last byte of state_entry_time, which * contains the XEN_RUNSTATE_UPDATE bit. */ rs_state = gpc1->khva; rs_times = gpc1->khva + times_ofs; if (v->kvm->arch.xen.runstate_update_flag) update_bit = ((void *)(&rs_times[1])) - 1; } else { /* * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else * takes them more than one at a time. Set a subclass on the * gpc1 lock to make lockdep shut up about it. */ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); if (atomic) { if (!read_trylock(&gpc2->lock)) { read_unlock_irqrestore(&gpc1->lock, flags); return; } } else { read_lock(&gpc2->lock); } if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; /* * Use kvm_gpc_activate() here because if the runstate * area was configured in 32-bit mode and only extends * to the second page now because the guest changed to * 64-bit mode, the second GPC won't have been set up. */ if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, user_len2)) return; /* * We dropped the lock on GPC1 so we have to go all the * way back and revalidate that too. */ goto retry; } /* * In this case, the runstate_info struct will be assembled on * the kernel stack (compat or not as appropriate) and will * be copied to GPC1/GPC2 with a dual memcpy. Set up the three * rs pointers accordingly. */ rs_times = &rs.state_entry_time; /* * The rs_state pointer points to the start of what we'll * copy to the guest, which in the case of a compat guest * is the 32-bit field that the compiler thinks is padding. */ rs_state = ((void *)rs_times) - times_ofs; /* * The update_bit is still directly in the guest memory, * via one GPC or the other. */ if (v->kvm->arch.xen.runstate_update_flag) { if (user_len1 >= times_ofs + sizeof(uint64_t)) update_bit = gpc1->khva + times_ofs + sizeof(uint64_t) - 1; else update_bit = gpc2->khva + times_ofs + sizeof(uint64_t) - 1 - user_len1; } #ifdef CONFIG_X86_64 /* * Don't leak kernel memory through the padding in the 64-bit * version of the struct. */ memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); #endif } /* * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the * state_entry_time field, directly in the guest. We need to set * that (and write-barrier) before writing to the rest of the * structure, and clear it last. Just as Xen does, we address the * single *byte* in which it resides because it might be in a * different cache line to the rest of the 64-bit word, due to * the (lack of) alignment constraints. */ entry_time = vx->runstate_entry_time; if (update_bit) { entry_time |= XEN_RUNSTATE_UPDATE; *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; smp_wmb(); } /* * Now assemble the actual structure, either on our kernel stack * or directly in the guest according to how the rs_state and * rs_times pointers were set up above. */ *rs_state = vx->current_runstate; rs_times[0] = entry_time; memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); /* For the split case, we have to then copy it to the guest. */ if (user_len2) { memcpy(gpc1->khva, rs_state, user_len1); memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); } smp_wmb(); /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ if (update_bit) { entry_time &= ~XEN_RUNSTATE_UPDATE; *update_bit = entry_time >> 56; smp_wmb(); } if (user_len2) { kvm_gpc_mark_dirty_in_slot(gpc2); read_unlock(&gpc2->lock); } kvm_gpc_mark_dirty_in_slot(gpc1); read_unlock_irqrestore(&gpc1->lock, flags); } void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; u64 now = get_kvmclock_ns(v->kvm); u64 delta_ns = now - vx->runstate_entry_time; u64 run_delay = current->sched_info.run_delay; if (unlikely(!vx->runstate_entry_time)) vx->current_runstate = RUNSTATE_offline; /* * Time waiting for the scheduler isn't "stolen" if the * vCPU wasn't running anyway. */ if (vx->current_runstate == RUNSTATE_running) { u64 steal_ns = run_delay - vx->last_steal; delta_ns -= steal_ns; vx->runstate_times[RUNSTATE_runnable] += steal_ns; } vx->last_steal = run_delay; vx->runstate_times[vx->current_runstate] += delta_ns; vx->current_runstate = state; vx->runstate_entry_time = now; if (vx->runstate_cache.active) kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; irq.dest_id = v->vcpu_id; irq.vector = v->arch.xen.upcall_vector; irq.dest_mode = APIC_DEST_PHYSICAL; irq.shorthand = APIC_DEST_NOSHORT; irq.delivery_mode = APIC_DM_FIXED; irq.level = 1; kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); } /* * On event channel delivery, the vcpu_info may not have been accessible. * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which * need to be marked into the vcpu_info (and evtchn_upcall_pending set). * Do so now that we can sleep in the context of the vCPU to bring the * page in, and refresh the pfn cache for it. */ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) { unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; unsigned long flags; if (!evtchn_pending_sel) return; /* * Yes, this is an open-coded loop. But that's just what put_user() * does anyway. Page it in and retry the instruction. We're just a * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); } /* Now gpc->khva is a valid kernel address for the vcpu_info */ if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { struct vcpu_info *vi = gpc->khva; asm volatile(LOCK_PREFIX "orq %0, %1\n" "notq %0\n" LOCK_PREFIX "andq %0, %2\n" : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) : "0" (evtchn_pending_sel)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } else { u32 evtchn_pending_sel32 = evtchn_pending_sel; struct compat_vcpu_info *vi = gpc->khva; asm volatile(LOCK_PREFIX "orl %0, %1\n" "notl %0\n" LOCK_PREFIX "andl %0, %2\n" : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) : "0" (evtchn_pending_sel32)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } kvm_gpc_mark_dirty_in_slot(gpc); read_unlock_irqrestore(&gpc->lock, flags); /* For the per-vCPU lapic vector, deliver it as MSI. */ if (v->arch.xen.upcall_vector) kvm_xen_inject_vcpu_vector(v); } int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; unsigned long flags; u8 rc = 0; /* * If the global upcall vector (HVMIRQ_callback_vector) is set and * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. */ /* No need for compat handling here */ BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != sizeof_field(struct vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* * This function gets called from kvm_vcpu_block() after setting the * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately * from a HLT. So we really mustn't sleep. If the page ended up absent * at that point, just return 1 in order to trigger an immediate wake, * and we'll end up getting called again from a context where we *can* * fault in the page and wait for it. */ if (in_atomic() || !task_is_running(current)) return 1; if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. */ return 0; } read_lock_irqsave(&gpc->lock, flags); } rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending; read_unlock_irqrestore(&gpc->lock, flags); return rc; } int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { r = -EINVAL; } else { mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; /* * Re-initialize shared_info to put the wallclock in the * correct place. Whilst it's not necessary to do this * unless the mode is actually changed, it does no harm * to make the call anyway. */ r = kvm->arch.xen.shinfo_cache.active ? kvm_xen_shared_info_init(kvm) : 0; mutex_unlock(&kvm->arch.xen.xen_lock); } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: { int idx; mutex_lock(&kvm->arch.xen.xen_lock); idx = srcu_read_lock(&kvm->srcu); if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) { gfn_t gfn = data->u.shared_info.gfn; if (gfn == KVM_XEN_INVALID_GFN) { kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); r = 0; } else { r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache, gfn_to_gpa(gfn), PAGE_SIZE); } } else { void __user * hva = u64_to_user_ptr(data->u.shared_info.hva); if (!PAGE_ALIGNED(hva)) { r = -EINVAL; } else if (!hva) { kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); r = 0; } else { r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache, (unsigned long)hva, PAGE_SIZE); } } srcu_read_unlock(&kvm->srcu, idx); if (!r && kvm->arch.xen.shinfo_cache.active) r = kvm_xen_shared_info_init(kvm); mutex_unlock(&kvm->arch.xen.xen_lock); break; } case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.upcall_vector = data->u.vector; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; case KVM_XEN_ATTR_TYPE_EVTCHN: r = kvm_xen_setattr_evtchn(kvm, data); break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.xen_version = data->u.xen_version; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; default: break; } return r; } int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; mutex_lock(&kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: data->u.long_mode = kvm->arch.xen.long_mode; r = 0; break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache)) data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); else data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; r = 0; break; case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache)) data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva; else data->u.shared_info.hva = 0; r = 0; break; case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: data->u.vector = kvm->arch.xen.upcall_vector; r = 0; break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: data->u.xen_version = kvm->arch.xen.xen_version; r = 0; break; case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; r = 0; break; default: break; } mutex_unlock(&kvm->arch.xen.xen_lock); return r; } int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int idx, r = -ENOENT; mutex_lock(&vcpu->kvm->arch.xen.xen_lock); idx = srcu_read_lock(&vcpu->kvm->srcu); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: /* No compat necessary here. */ BUILD_BUG_ON(sizeof(struct vcpu_info) != sizeof(struct compat_vcpu_info)); BUILD_BUG_ON(offsetof(struct vcpu_info, time) != offsetof(struct compat_vcpu_info, time)); if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) { if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, data->u.gpa, sizeof(struct vcpu_info)); } else { if (data->u.hva == 0) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache, data->u.hva, sizeof(struct vcpu_info)); } if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; } r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, data->u.gpa, sizeof(struct pvclock_vcpu_time_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { size_t sz, sz1, sz2; if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == KVM_XEN_INVALID_GPA) { r = 0; deactivate_out: kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); break; } /* * If the guest switches to 64-bit mode after setting the runstate * address, that's actually OK. kvm_xen_update_runstate_guest() * will cope. */ if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) sz = sizeof(struct vcpu_runstate_info); else sz = sizeof(struct compat_vcpu_runstate_info); /* How much fits in the (first) page? */ sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, data->u.gpa, sz1); if (r) goto deactivate_out; /* Either map the second page, or deactivate the second GPC */ if (sz1 >= sz) { kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); } else { sz2 = sz - sz1; BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, data->u.gpa + sz1, sz2); if (r) goto deactivate_out; } kvm_xen_update_runstate_guest(vcpu, false); break; } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline) { r = -EINVAL; break; } kvm_xen_update_runstate(vcpu, data->u.runstate.state); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline) { r = -EINVAL; break; } if (data->u.runstate.state_entry_time != (data->u.runstate.time_running + data->u.runstate.time_runnable + data->u.runstate.time_blocked + data->u.runstate.time_offline)) { r = -EINVAL; break; } if (get_kvmclock_ns(vcpu->kvm) < data->u.runstate.state_entry_time) { r = -EINVAL; break; } vcpu->arch.xen.current_runstate = data->u.runstate.state; vcpu->arch.xen.runstate_entry_time = data->u.runstate.state_entry_time; vcpu->arch.xen.runstate_times[RUNSTATE_running] = data->u.runstate.time_running; vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = data->u.runstate.time_runnable; vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = data->u.runstate.time_blocked; vcpu->arch.xen.runstate_times[RUNSTATE_offline] = data->u.runstate.time_offline; vcpu->arch.xen.last_steal = current->sched_info.run_delay; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline && data->u.runstate.state != (u64)-1) { r = -EINVAL; break; } /* The adjustment must add up */ if (data->u.runstate.state_entry_time != (data->u.runstate.time_running + data->u.runstate.time_runnable + data->u.runstate.time_blocked + data->u.runstate.time_offline)) { r = -EINVAL; break; } if (get_kvmclock_ns(vcpu->kvm) < (vcpu->arch.xen.runstate_entry_time + data->u.runstate.state_entry_time)) { r = -EINVAL; break; } vcpu->arch.xen.runstate_entry_time += data->u.runstate.state_entry_time; vcpu->arch.xen.runstate_times[RUNSTATE_running] += data->u.runstate.time_running; vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += data->u.runstate.time_runnable; vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += data->u.runstate.time_blocked; vcpu->arch.xen.runstate_times[RUNSTATE_offline] += data->u.runstate.time_offline; if (data->u.runstate.state <= RUNSTATE_offline) kvm_xen_update_runstate(vcpu, data->u.runstate.state); else if (vcpu->arch.xen.runstate_cache.active) kvm_xen_update_runstate_guest(vcpu, false); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: if (data->u.vcpu_id >= KVM_MAX_VCPUS) r = -EINVAL; else { vcpu->arch.xen.vcpu_id = data->u.vcpu_id; r = 0; } break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: if (data->u.timer.port && data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { r = -EINVAL; break; } /* Stop the timer (if it's running) before changing the vector */ kvm_xen_stop_timer(vcpu); vcpu->arch.xen.timer_virq = data->u.timer.port; /* Start the timer if the new value has a valid vector+expiry. */ if (data->u.timer.port && data->u.timer.expires_ns) kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { vcpu->arch.xen.upcall_vector = data->u.vector; r = 0; } break; default: break; } srcu_read_unlock(&vcpu->kvm->srcu, idx); mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int r = -ENOENT; mutex_lock(&vcpu->kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache)) data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; else data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache)) data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva; else data->u.hva = 0; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (vcpu->arch.xen.vcpu_time_info_cache.active) data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; else data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (vcpu->arch.xen.runstate_cache.active) { data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; r = 0; } break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate.state = vcpu->arch.xen.current_runstate; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate.state = vcpu->arch.xen.current_runstate; data->u.runstate.state_entry_time = vcpu->arch.xen.runstate_entry_time; data->u.runstate.time_running = vcpu->arch.xen.runstate_times[RUNSTATE_running]; data->u.runstate.time_runnable = vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; data->u.runstate.time_blocked = vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; data->u.runstate.time_offline = vcpu->arch.xen.runstate_times[RUNSTATE_offline]; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: r = -EINVAL; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: data->u.vcpu_id = vcpu->arch.xen.vcpu_id; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: /* * Ensure a consistent snapshot of state is captured, with a * timer either being pending, or the event channel delivered * to the corresponding bit in the shared_info. Not still * lurking in the timer_pending flag for deferred delivery. * Purely as an optimisation, if the timer_expires field is * zero, that means the timer isn't active (or even in the * timer_pending flag) and there is no need to cancel it. */ if (vcpu->arch.xen.timer_expires) { hrtimer_cancel(&vcpu->arch.xen.timer); kvm_xen_inject_timer_irqs(vcpu); } data->u.timer.port = vcpu->arch.xen.timer_virq; data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; data->u.timer.expires_ns = vcpu->arch.xen.timer_expires; /* * The hrtimer may trigger and raise the IRQ immediately, * while the returned state causes it to be set up and * raised again on the destination system after migration. * That's fine, as the guest won't even have had a chance * to run and handle the interrupt. Asserting an already * pending event channel is idempotent. */ if (vcpu->arch.xen.timer_expires) hrtimer_start_expires(&vcpu->arch.xen.timer, HRTIMER_MODE_ABS_HARD); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: data->u.vector = vcpu->arch.xen.upcall_vector; r = 0; break; default: break; } mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) { struct kvm *kvm = vcpu->kvm; u32 page_num = data & ~PAGE_MASK; u64 page_addr = data & PAGE_MASK; bool lm = is_long_mode(vcpu); int r = 0; mutex_lock(&kvm->arch.xen.xen_lock); if (kvm->arch.xen.long_mode != lm) { kvm->arch.xen.long_mode = lm; /* * Re-initialize shared_info to put the wallclock in the * correct place. */ if (kvm->arch.xen.shinfo_cache.active && kvm_xen_shared_info_init(kvm)) r = 1; } mutex_unlock(&kvm->arch.xen.xen_lock); if (r) return r; /* * If Xen hypercall intercept is enabled, fill the hypercall * page with VMCALL/VMMCALL instructions since that's what * we catch. Else the VMM has provided the hypercall pages * with instructions of its own choosing, so use those. */ if (kvm_xen_hypercall_enabled(kvm)) { u8 instructions[32]; int i; if (page_num) return 1; /* mov imm32, %eax */ instructions[0] = 0xb8; /* vmcall / vmmcall */ kvm_x86_call(patch_hypercall)(vcpu, instructions + 5); /* ret */ instructions[8] = 0xc3; /* int3 to pad */ memset(instructions + 9, 0xcc, sizeof(instructions) - 9); for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { *(u32 *)&instructions[1] = i; if (kvm_vcpu_write_guest(vcpu, page_addr + (i * sizeof(instructions)), instructions, sizeof(instructions))) return 1; } } else { /* * Note, truncation is a non-issue as 'lm' is guaranteed to be * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. */ hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64 : kvm->arch.xen.hvm_config.blob_addr_32; u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64 : kvm->arch.xen.hvm_config.blob_size_32; u8 *page; int ret; if (page_num >= blob_size) return 1; blob_addr += page_num * PAGE_SIZE; page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); if (IS_ERR(page)) return PTR_ERR(page); ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE); kfree(page); if (ret) return 1; } return 0; } int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) { /* Only some feature flags need to be *enabled* by userspace */ u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_EVTCHN_SEND | KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; u32 old_flags; if (xhc->flags & ~permitted_flags) return -EINVAL; /* * With hypercall interception the kernel generates its own * hypercall page so it must not be provided. */ if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && (xhc->blob_addr_32 || xhc->blob_addr_64 || xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; /* * Restrict the MSR to the range that is unofficially reserved for * synthetic, virtualization-defined MSRs, e.g. to prevent confusing * KVM by colliding with a real MSR that requires special handling. */ if (xhc->msr && (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX)) return -EINVAL; mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen.hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); else if (!xhc->msr && kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); old_flags = kvm->arch.xen.hvm_config.flags; memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); return 0; } static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) { kvm_rax_write(vcpu, result); return kvm_skip_emulated_instruction(vcpu); } static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) return 1; return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } static inline int max_evtchn_port(struct kvm *kvm) { if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) return EVTCHN_2L_NR_CHANNELS; else return COMPAT_EVTCHN_2L_NR_CHANNELS; } static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { struct kvm *kvm = vcpu->kvm; struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; unsigned long *pending_bits; unsigned long flags; bool ret = true; int idx, i; idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; ret = false; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; } else { struct compat_shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; } for (i = 0; i < nr_ports; i++) { if (test_bit(ports[i], pending_bits)) { ret = true; break; } } out_rcu: read_unlock_irqrestore(&gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); return ret; } static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, u64 param, u64 *r) { struct sched_poll sched_poll; evtchn_port_t port, *ports; struct x86_exception e; int i; if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; if (IS_ENABLED(CONFIG_64BIT) && !longmode) { struct compat_sched_poll sp32; /* Sanity check that the compat struct definition is correct */ BUILD_BUG_ON(sizeof(sp32) != 16); if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) { *r = -EFAULT; return true; } /* * This is a 32-bit pointer to an array of evtchn_port_t which * are uint32_t, so once it's converted no further compat * handling is needed. */ sched_poll.ports = (void *)(unsigned long)(sp32.ports); sched_poll.nr_ports = sp32.nr_ports; sched_poll.timeout = sp32.timeout; } else { if (kvm_read_guest_virt(vcpu, param, &sched_poll, sizeof(sched_poll), &e)) { *r = -EFAULT; return true; } } if (unlikely(sched_poll.nr_ports > 1)) { /* Xen (unofficially) limits number of pollers to 128 */ if (sched_poll.nr_ports > 128) { *r = -EINVAL; return true; } ports = kmalloc_array(sched_poll.nr_ports, sizeof(*ports), GFP_KERNEL); if (!ports) { *r = -ENOMEM; return true; } } else ports = &port; if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, sched_poll.nr_ports * sizeof(*ports), &e)) { *r = -EFAULT; goto out; } for (i = 0; i < sched_poll.nr_ports; i++) { if (ports[i] >= max_evtchn_port(vcpu->kvm)) { *r = -EINVAL; goto out; } } if (sched_poll.nr_ports == 1) vcpu->arch.xen.poll_evtchn = port; else vcpu->arch.xen.poll_evtchn = -1; set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) { kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED); if (sched_poll.timeout) mod_timer(&vcpu->arch.xen.poll_timer, jiffies + nsecs_to_jiffies(sched_poll.timeout)); kvm_vcpu_halt(vcpu); if (sched_poll.timeout) timer_delete(&vcpu->arch.xen.poll_timer); kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); } vcpu->arch.xen.poll_evtchn = 0; *r = 0; out: /* Really, this is only needed in case of timeout */ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); if (unlikely(sched_poll.nr_ports > 1)) kfree(ports); return true; } static void cancel_evtchn_poll(struct timer_list *t) { struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, arch.xen.poll_timer); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, u64 param, u64 *r) { switch (cmd) { case SCHEDOP_poll: if (kvm_xen_schedop_poll(vcpu, longmode, param, r)) return true; fallthrough; case SCHEDOP_yield: kvm_vcpu_on_spin(vcpu, true); *r = 0; return true; default: break; } return false; } struct compat_vcpu_set_singleshot_timer { uint64_t timeout_abs_ns; uint32_t flags; } __attribute__((packed)); static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, int vcpu_id, u64 param, u64 *r) { struct vcpu_set_singleshot_timer oneshot; struct x86_exception e; if (!kvm_xen_timer_enabled(vcpu)) return false; switch (cmd) { case VCPUOP_set_singleshot_timer: if (vcpu->arch.xen.vcpu_id != vcpu_id) { *r = -EINVAL; return true; } /* * The only difference for 32-bit compat is the 4 bytes of * padding after the interesting part of the structure. So * for a faithful emulation of Xen we have to *try* to copy * the padding and return -EFAULT if we can't. Otherwise we * might as well just have copied the 12-byte 32-bit struct. */ BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != offsetof(struct vcpu_set_singleshot_timer, timeout_abs_ns)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != sizeof_field(struct vcpu_set_singleshot_timer, timeout_abs_ns)); BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, flags) != offsetof(struct vcpu_set_singleshot_timer, flags)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) != sizeof_field(struct vcpu_set_singleshot_timer, flags)); if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) : sizeof(struct compat_vcpu_set_singleshot_timer), &e)) { *r = -EFAULT; return true; } kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false); *r = 0; return true; case VCPUOP_stop_singleshot_timer: if (vcpu->arch.xen.vcpu_id != vcpu_id) { *r = -EINVAL; return true; } kvm_xen_stop_timer(vcpu); *r = 0; return true; } return false; } static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout, u64 *r) { if (!kvm_xen_timer_enabled(vcpu)) return false; if (timeout) kvm_xen_start_timer(vcpu, timeout, true); else kvm_xen_stop_timer(vcpu); *r = 0; return true; } int kvm_xen_hypercall(struct kvm_vcpu *vcpu) { bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); /* Hyper-V hypercalls get bit 31 set in EAX */ if ((input & 0x80000000) && kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); longmode = is_64_bit_hypercall(vcpu); if (!longmode) { params[0] = (u32)kvm_rbx_read(vcpu); params[1] = (u32)kvm_rcx_read(vcpu); params[2] = (u32)kvm_rdx_read(vcpu); params[3] = (u32)kvm_rsi_read(vcpu); params[4] = (u32)kvm_rdi_read(vcpu); params[5] = (u32)kvm_rbp_read(vcpu); } #ifdef CONFIG_X86_64 else { params[0] = (u64)kvm_rdi_read(vcpu); params[1] = (u64)kvm_rsi_read(vcpu); params[2] = (u64)kvm_rdx_read(vcpu); params[3] = (u64)kvm_r10_read(vcpu); params[4] = (u64)kvm_r8_read(vcpu); params[5] = (u64)kvm_r9_read(vcpu); } #endif cpl = kvm_x86_call(get_cpl)(vcpu); trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); /* * Only allow hypercall acceleration for CPL0. The rare hypercalls that * are permitted in guest userspace can be handled by the VMM. */ if (unlikely(cpl > 0)) goto handle_in_userspace; switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { r = vcpu->kvm->arch.xen.xen_version; handled = true; } break; case __HYPERVISOR_event_channel_op: if (params[0] == EVTCHNOP_send) handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r); break; case __HYPERVISOR_sched_op: handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0], params[1], &r); break; case __HYPERVISOR_vcpu_op: handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1], params[2], &r); break; case __HYPERVISOR_set_timer_op: { u64 timeout = params[0]; /* In 32-bit mode, the 64-bit timeout is in two 32-bit params. */ if (!longmode) timeout |= params[1] << 32; handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r); break; } default: break; } if (handled) return kvm_xen_hypercall_set_result(vcpu, r); handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; vcpu->run->xen.u.hcall.params[2] = params[2]; vcpu->run->xen.u.hcall.params[3] = params[3]; vcpu->run->xen.u.hcall.params[4] = params[4]; vcpu->run->xen.u.hcall.params[5] = params[5]; vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = kvm_xen_hypercall_complete_userspace; return 0; } static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; if ((poll_evtchn == port || poll_evtchn == -1) && test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) { kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } } /* * The return value from this function is propagated to kvm_set_irq() API, * so it returns: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to * * It is also called directly from kvm_arch_set_irq_inatomic(), where the * only check on its return value is a comparison with -EWOULDBLOCK'. */ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct kvm_vcpu *vcpu; unsigned long *pending_bits, *mask_bits; unsigned long flags; int port_word_bit; bool kick_vcpu = false; int vcpu_idx, idx, rc; vcpu_idx = READ_ONCE(xe->vcpu_idx); if (vcpu_idx >= 0) vcpu = kvm_get_vcpu(kvm, vcpu_idx); else { vcpu = kvm_get_vcpu_by_id(kvm, xe->vcpu_id); if (!vcpu) return -EINVAL; WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); } if (xe->port >= max_evtchn_port(kvm)) return -EINVAL; rc = -EWOULDBLOCK; idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; port_word_bit = xe->port / 64; } else { struct compat_shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; port_word_bit = xe->port / 32; } /* * If this port wasn't already set, and if it isn't masked, then * we try to set the corresponding bit in the in-kernel shadow of * evtchn_pending_sel for the target vCPU. And if *that* wasn't * already set, then we kick the vCPU in question to write to the * *real* evtchn_pending_sel in its own guest vcpu_info struct. */ if (test_and_set_bit(xe->port, pending_bits)) { rc = 0; /* It was already raised */ } else if (test_bit(xe->port, mask_bits)) { rc = -ENOTCONN; /* Masked */ kvm_xen_check_poller(vcpu, xe->port); } else { rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ read_unlock_irqrestore(&gpc->lock, flags); gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. */ if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) kick_vcpu = true; goto out_rcu; } if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct vcpu_info *vcpu_info = gpc->khva; if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) { WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); kick_vcpu = true; } } else { struct compat_vcpu_info *vcpu_info = gpc->khva; if (!test_and_set_bit(port_word_bit, (unsigned long *)&vcpu_info->evtchn_pending_sel)) { WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); kick_vcpu = true; } } /* For the per-vCPU lapic vector, deliver it as MSI. */ if (kick_vcpu && vcpu->arch.xen.upcall_vector) { kvm_xen_inject_vcpu_vector(vcpu); kick_vcpu = false; } } out_rcu: read_unlock_irqrestore(&gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); if (kick_vcpu) { kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } return rc; } static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) { bool mm_borrowed = false; int rc; rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) return rc; if (current->mm != kvm->mm) { /* * If not on a thread which already belongs to this KVM, * we'd better be in the irqfd workqueue. */ if (WARN_ON_ONCE(current->mm)) return -EINVAL; kthread_use_mm(kvm->mm); mm_borrowed = true; } /* * It is theoretically possible for the page to be unmapped * and the MMU notifier to invalidate the shared_info before * we even get to use it. In that case, this looks like an * infinite loop. It was tempting to do it via the userspace * HVA instead... but that just *hides* the fact that it's * an infinite loop, because if a fault occurs and it waits * for the page to come back, it can *still* immediately * fault and have to wait again, repeatedly. * * Conversely, the page could also have been reinstated by * another thread before we even obtain the mutex above, so * check again *first* before remapping it. */ do { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; int idx; rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) break; idx = srcu_read_lock(&kvm->srcu); rc = kvm_gpc_refresh(gpc, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); if (mm_borrowed) kthread_unuse_mm(kvm->mm); return rc; } /* This is the version called from kvm_set_irq() as the .set function */ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { if (!level) return -EINVAL; return kvm_xen_set_evtchn(&e->xen_evtchn, kvm); } /* * Set up an event channel interrupt from the KVM IRQ routing table. * Used for e.g. PIRQ from passed through physical devices. */ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { struct kvm_vcpu *vcpu; /* * Don't check for the port being within range of max_evtchn_port(). * Userspace can configure what ever targets it likes; events just won't * be delivered if/while the target is invalid, just like userspace can * configure MSIs which target non-existent APICs. * * This allow on Live Migration and Live Update, the IRQ routing table * can be restored *independently* of other things like creating vCPUs, * without imposing an ordering dependency on userspace. In this * particular case, the problematic ordering would be with setting the * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 * instead of 1024 event channels. */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) return -EINVAL; /* * Xen gives us interesting mappings from vCPU index to APIC ID, * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs * to find it. Do that once at setup time, instead of every time. * But beware that on live update / live migration, the routing * table might be reinstated before the vCPU threads have finished * recreating their vCPUs. */ vcpu = kvm_get_vcpu_by_id(kvm, ue->u.xen_evtchn.vcpu); if (vcpu) e->xen_evtchn.vcpu_idx = vcpu->vcpu_idx; else e->xen_evtchn.vcpu_idx = -1; e->xen_evtchn.port = ue->u.xen_evtchn.port; e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu; e->xen_evtchn.priority = ue->u.xen_evtchn.priority; e->set = evtchn_set_fn; return 0; } /* * Explicit event sending from userspace with KVM_XEN_HVM_EVTCHN_SEND ioctl. */ int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *uxe) { struct kvm_xen_evtchn e; int ret; if (!uxe->port || uxe->port >= max_evtchn_port(kvm)) return -EINVAL; /* We only support 2 level event channels for now */ if (uxe->priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) return -EINVAL; e.port = uxe->port; e.vcpu_id = uxe->vcpu; e.vcpu_idx = -1; e.priority = uxe->priority; ret = kvm_xen_set_evtchn(&e, kvm); /* * None of that 'return 1 if it actually got delivered' nonsense. * We don't care if it was masked (-ENOTCONN) either. */ if (ret > 0 || ret == -ENOTCONN) ret = 0; return ret; } /* * Support for *outbound* event channel events via the EVTCHNOP_send hypercall. */ struct evtchnfd { u32 send_port; u32 type; union { struct kvm_xen_evtchn port; struct { u32 port; /* zero */ struct eventfd_ctx *ctx; } eventfd; } deliver; }; /* * Update target vCPU or priority for a registered sending channel. */ static int kvm_xen_eventfd_update(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; struct evtchnfd *evtchnfd; int ret; /* Protect writes to evtchnfd as well as the idr lookup. */ mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); ret = -ENOENT; if (!evtchnfd) goto out_unlock; /* For an UPDATE, nothing may change except the priority/vcpu */ ret = -EINVAL; if (evtchnfd->type != data->u.evtchn.type) goto out_unlock; /* * Port cannot change, and if it's zero that was an eventfd * which can't be changed either. */ if (!evtchnfd->deliver.port.port || evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port) goto out_unlock; /* We only support 2 level event channels for now */ if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) goto out_unlock; evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) { evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; evtchnfd->deliver.port.vcpu_idx = -1; } ret = 0; out_unlock: mutex_unlock(&kvm->arch.xen.xen_lock); return ret; } /* * Configure the target (eventfd or local port delivery) for sending on * a given event channel. */ static int kvm_xen_eventfd_assign(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; struct eventfd_ctx *eventfd = NULL; struct evtchnfd *evtchnfd; int ret = -EINVAL; evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL); if (!evtchnfd) return -ENOMEM; switch(data->u.evtchn.type) { case EVTCHNSTAT_ipi: /* IPI must map back to the same port# */ if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port) goto out_noeventfd; /* -EINVAL */ break; case EVTCHNSTAT_interdomain: if (data->u.evtchn.deliver.port.port) { if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm)) goto out_noeventfd; /* -EINVAL */ } else { eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd); if (IS_ERR(eventfd)) { ret = PTR_ERR(eventfd); goto out_noeventfd; } } break; case EVTCHNSTAT_virq: case EVTCHNSTAT_closed: case EVTCHNSTAT_unbound: case EVTCHNSTAT_pirq: default: /* Unknown event channel type */ goto out; /* -EINVAL */ } evtchnfd->send_port = data->u.evtchn.send_port; evtchnfd->type = data->u.evtchn.type; if (eventfd) { evtchnfd->deliver.eventfd.ctx = eventfd; } else { /* We only support 2 level event channels for now */ if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) goto out; /* -EINVAL; */ evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port; evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; evtchnfd->deliver.port.vcpu_idx = -1; evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; } mutex_lock(&kvm->arch.xen.xen_lock); ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, GFP_KERNEL); mutex_unlock(&kvm->arch.xen.xen_lock); if (ret >= 0) return 0; if (ret == -ENOSPC) ret = -EEXIST; out: if (eventfd) eventfd_ctx_put(eventfd); out_noeventfd: kfree(evtchnfd); return ret; } static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) { struct evtchnfd *evtchnfd; mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); mutex_unlock(&kvm->arch.xen.xen_lock); if (!evtchnfd) return -ENOENT; synchronize_srcu(&kvm->srcu); if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); return 0; } static int kvm_xen_eventfd_reset(struct kvm *kvm) { struct evtchnfd *evtchnfd, **all_evtchnfds; int i; int n = 0; mutex_lock(&kvm->arch.xen.xen_lock); /* * Because synchronize_srcu() cannot be called inside the * critical section, first collect all the evtchnfd objects * in an array as they are removed from evtchn_ports. */ idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) n++; all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); if (!all_evtchnfds) { mutex_unlock(&kvm->arch.xen.xen_lock); return -ENOMEM; } n = 0; idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); } mutex_unlock(&kvm->arch.xen.xen_lock); synchronize_srcu(&kvm->srcu); while (n--) { evtchnfd = all_evtchnfds[n]; if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); } kfree(all_evtchnfds); return 0; } static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET) return kvm_xen_eventfd_reset(kvm); if (!port || port >= max_evtchn_port(kvm)) return -EINVAL; if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN) return kvm_xen_eventfd_deassign(kvm, port); if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE) return kvm_xen_eventfd_update(kvm, data); if (data->u.evtchn.flags) return -EINVAL; return kvm_xen_eventfd_assign(kvm, data); } static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) { struct evtchnfd *evtchnfd; struct evtchn_send send; struct x86_exception e; /* Sanity check: this structure is the same for 32-bit and 64-bit */ BUILD_BUG_ON(sizeof(send) != 4); if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) { *r = -EFAULT; return true; } /* * evtchnfd is protected by kvm->srcu; the idr lookup instead * is protected by RCU. */ rcu_read_lock(); evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port); rcu_read_unlock(); if (!evtchnfd) return false; if (evtchnfd->deliver.port.port) { int ret = kvm_xen_set_evtchn(&evtchnfd->deliver.port, vcpu->kvm); if (ret < 0 && ret != -ENOTCONN) return false; } else { eventfd_signal(evtchnfd->deliver.eventfd.ctx); } *r = 0; return true; } void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) { vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; vcpu->arch.xen.poll_evtchn = 0; timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); hrtimer_setup(&vcpu->arch.xen.timer, xen_timer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) { if (kvm_xen_timer_enabled(vcpu)) kvm_xen_stop_timer(vcpu); kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); timer_delete_sync(&vcpu->arch.xen.poll_timer); } void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm); } void kvm_xen_destroy_vm(struct kvm *kvm) { struct evtchnfd *evtchnfd; int i; kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); } idr_destroy(&kvm->arch.xen.evtchn_ports); if (kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); } |
| 164 21 39 39 22 55 16 23 1 3 32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_PORT_H #define _LINUX_TTY_PORT_H #include <linux/kfifo.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/tty_buffer.h> #include <linux/wait.h> struct attribute_group; struct tty_driver; struct tty_port; struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port * @dtr_rts: raise the DTR line if @active is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and * @shutdown. * @activate: called under the port mutex from tty_port_open(), serialized using * the port mutex. Supposed to turn on the device. * * FIXME: long term getting the tty argument *out* of this would be good * for consoles. * * @destruct: called on the final put of a port. Free resources, possibly incl. * the port itself. */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); void (*dtr_rts)(struct tty_port *port, bool active); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*lookahead_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; /** * struct tty_port -- port level information * * @buf: buffer for this port, locked internally * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use * tty_port_tty_get() to obtain it (and tty_kref_put() to release). * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be * eliminated in the long term. * @ops: tty port operations (like activate, shutdown), see &struct * tty_port_operations * @client_ops: tty port client operations (like receive_buf, write_wakeup). * By default, tty_port_default_client_ops is used. * @lock: lock protecting @tty * @blocked_open: # of procs waiting for open in tty_port_block_til_ready() * @count: usage count * @open_wait: open waiters queue (waiting e.g. for a carrier) * @delta_msr_wait: modem status change queue (waiting for MSR changes) * @flags: user TTY flags (%ASYNC_) * @iflags: internal flags (%TTY_PORT_) * @console: when set, the port is a console * @mutex: locking, for open, shutdown and other port operations * @buf_mutex: @xmit_buf alloc lock * @xmit_buf: optional xmit buffer used by some drivers * @xmit_fifo: optional xmit buffer used by some drivers * @close_delay: delay in jiffies to wait when closing the port * @closing_wait: delay in jiffies for output to be sent before closing * @drain_delay: set to zero if no pure time based drain is needed else set to * size of fifo * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL * or frees the port otherwise. * @client_data: pointer to private data, for @client_ops * * Each device keeps its own port level information. &struct tty_port was * introduced as a common structure for such information. As every TTY device * shall have a backing tty_port structure, every driver can use these members. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. * * Tty port shall be initialized by tty_port_init() and shut down either by * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting). * * There is a lot of helpers around &struct tty_port too. To name the most * significant ones: tty_port_open(), tty_port_close() (or * tty_port_close_start() and tty_port_close_end() separately if need be), and * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as * needed. */ struct tty_port { struct tty_bufhead buf; struct tty_struct *tty; struct tty_struct *itty; const struct tty_port_operations *ops; const struct tty_port_client_operations *client_ops; spinlock_t lock; int blocked_open; int count; wait_queue_head_t open_wait; wait_queue_head_t delta_msr_wait; unsigned long flags; unsigned long iflags; unsigned char console:1; struct mutex mutex; struct mutex buf_mutex; u8 *xmit_buf; DECLARE_KFIFO_PTR(xmit_fifo, u8); unsigned int close_delay; unsigned int closing_wait; int drain_delay; struct kref kref; void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ void tty_port_init(struct tty_port *port); void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp); void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); int tty_port_alloc_xmit_buf(struct tty_port *port); void tty_port_free_xmit_buf(struct tty_port *port); void tty_port_destroy(struct tty_port *port); void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(const struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } static inline bool tty_port_active(const struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } static inline bool tty_port_check_carrier(const struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } static inline bool tty_port_suspended(const struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } static inline bool tty_port_initialized(const struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } static inline bool tty_port_kopened(const struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { assign_bit(TTY_PORT_KOPENED, &port->iflags, val); } struct tty_struct *tty_port_tty_get(struct tty_port *port); void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); void __tty_port_tty_hangup(struct tty_port *port, bool check_clocal, bool async); void tty_port_tty_wakeup(struct tty_port *port); int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } /** * tty_port_tty_hangup - helper to hang up a tty asynchronously * @port: tty port * @check_clocal: hang only ttys with %CLOCAL unset? */ static inline void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) { __tty_port_tty_hangup(port, check_clocal, true); } /** * tty_port_tty_vhangup - helper to hang up a tty synchronously * @port: tty port */ static inline void tty_port_tty_vhangup(struct tty_port *port) { __tty_port_tty_hangup(port, false, false); } #ifdef CONFIG_TTY void tty_kref_put(struct tty_struct *tty); __DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true); __DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock)); static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport) { class_tty_port_tty_t _t = { .lock = tty_port_tty_get(tport), }; return _t; } #define scoped_tty() ((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope))) #endif #endif |
| 14 9 45 45 7 35 35 35 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 | #include <linux/gfp.h> #include <linux/initrd.h> #include <linux/ioport.h> #include <linux/swap.h> #include <linux/memblock.h> #include <linux/swapfile.h> #include <linux/swapops.h> #include <linux/kmemleak.h> #include <linux/sched/task.h> #include <linux/execmem.h> #include <asm/set_memory.h> #include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/init.h> #include <asm/page.h> #include <asm/page_types.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/proto.h> #include <asm/dma.h> /* for MAX_DMA_PFN */ #include <asm/kaslr.h> #include <asm/hypervisor.h> #include <asm/cpufeature.h> #include <asm/pti.h> #include <asm/text-patching.h> #include <asm/memtype.h> #include <asm/paravirt.h> #include <asm/mmu_context.h> /* * We need to define the tracepoints somewhere, and tlb.c * is only compiled when SMP=y. */ #define CREATE_TRACE_POINTS #include <trace/events/tlb.h> #include "mm_internal.h" /* * Tables translating between page_cache_type_t and pte encoding. * * The default values are defined statically as minimal supported mode; * WC and WT fall back to UC-. pat_init() updates these values to support * more cache modes, WC and WT, when it is safe to do so. See pat_init() * for the details. Note, __early_ioremap() used during early boot-time * takes pgprot_t (pte encoding) and does not use these tables. * * Index into __cachemode2pte_tbl[] is the cachemode. * * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WB ] = 0 | 0 , [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, }; unsigned long cachemode2protval(enum page_cache_mode pcm) { if (likely(pcm == 0)) return 0; return __cachemode2pte_tbl[pcm]; } EXPORT_SYMBOL(cachemode2protval); static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; /* * Check that the write-protect PAT entry is set for write-protect. * To do this without making assumptions how PAT has been set up (Xen has * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache * mode via the __cachemode2pte_tbl[] into protection bits (those protection * bits will select a cache mode of WP or better), and then translate the * protection bits back into the cache mode using __pte2cm_idx() and the * __pte2cachemode_tbl[] array. This will return the really used cache mode. */ bool x86_has_pat_wp(void) { uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) { unsigned long masked; masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; if (likely(masked == 0)) return 0; return __pte2cachemode_tbl[__pte2cm_idx(masked)]; } static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; static unsigned long __initdata pgt_buf_top; static unsigned long min_pfn_mapped; static bool __initdata can_use_brk_pgt = true; /* * Pages returned are already directly mapped. * * Changing that is likely to break Xen, see commit: * * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve * * for detailed information. */ __ref void *alloc_low_pages(unsigned int num) { unsigned long pfn; int i; if (after_bootmem) { unsigned int order; order = get_order((unsigned long)num << PAGE_SHIFT); return (void *)__get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); } if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret = 0; if (min_pfn_mapped < max_pfn_mapped) { ret = memblock_phys_alloc_range( PAGE_SIZE * num, PAGE_SIZE, min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT); } if (!ret && can_use_brk_pgt) ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE)); if (!ret) panic("alloc_low_pages: can not alloc memory"); pfn = ret >> PAGE_SHIFT; } else { pfn = pgt_buf_end; pgt_buf_end += num; } for (i = 0; i < num; i++) { void *adr; adr = __va((pfn + i) << PAGE_SHIFT); clear_page(adr); } return __va(pfn << PAGE_SHIFT); } /* * By default need to be able to allocate page tables below PGD firstly for * the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping. * With KASLR memory randomization, depending on the machine e820 memory and the * PUD alignment, twice that many pages may be needed when KASLR memory * randomization is enabled. */ #define INIT_PGD_PAGE_TABLES 4 #ifndef CONFIG_RANDOMIZE_MEMORY #define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES) #else #define INIT_PGD_PAGE_COUNT (4 * INIT_PGD_PAGE_TABLES) #endif #define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE) RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); void __init early_alloc_pgt_buf(void) { unsigned long tables = INIT_PGT_BUF_SIZE; phys_addr_t base; base = __pa(extend_brk(tables, PAGE_SIZE)); pgt_buf_start = base >> PAGE_SHIFT; pgt_buf_end = pgt_buf_start; pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); } int after_bootmem; early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES); struct map_range { unsigned long start; unsigned long end; unsigned page_size_mask; }; static int page_size_mask; /* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot * up after us can get the correct flags. Invoked on the boot CPU. */ static inline void cr4_set_bits_and_update_boot(unsigned long mask) { mmu_cr4_features |= mask; if (trampoline_cr4_features) *trampoline_cr4_features = mmu_cr4_features; cr4_set_bits(mask); } static void __init probe_page_size_mask(void) { /* * For pagealloc debugging, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; else direct_gbpages = 0; /* Enable PSE if available */ if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } /* By the default is everything supported: */ __default_kernel_pte_mask = __supported_pte_mask; /* Except when with PTI where the kernel is mostly non-Global: */ if (cpu_feature_enabled(X86_FEATURE_PTI)) __default_kernel_pte_mask &= ~_PAGE_GLOBAL; /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); page_size_mask |= 1 << PG_LEVEL_1G; } else { direct_gbpages = 0; } } /* * INVLPG may not properly flush Global entries on * these CPUs. New microcode fixes the issue. */ static const struct x86_cpu_id invlpg_miss_ids[] = { X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e), X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c), X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11), X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118), X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e), {} }; static void setup_pcid(void) { const struct x86_cpu_id *invlpg_miss_match; if (!IS_ENABLED(CONFIG_X86_64)) return; if (!boot_cpu_has(X86_FEATURE_PCID)) return; invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); if (invlpg_miss_match && boot_cpu_data.microcode < invlpg_miss_match->driver_data) { pr_info("Incomplete global flushes, disabling PCID"); setup_clear_cpu_cap(X86_FEATURE_PCID); return; } if (boot_cpu_has(X86_FEATURE_PGE)) { /* * This can't be cr4_set_bits_and_update_boot() -- the * trampoline code can't handle CR4.PCIDE and it wouldn't * do any good anyway. Despite the name, * cr4_set_bits_and_update_boot() doesn't actually cause * the bits in question to remain set all the way through * the secondary boot asm. * * Instead, we brute-force it and set CR4.PCIDE manually in * start_secondary(). */ cr4_set_bits(X86_CR4_PCIDE); } else { /* * flush_tlb_all(), as currently implemented, won't work if * PCID is on but PGE is not. Since that combination * doesn't exist on real hardware, there's no reason to try * to fully support it, but it's polite to avoid corrupting * data if we're on an improperly configured VM. */ setup_clear_cpu_cap(X86_FEATURE_PCID); } } #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ #define NR_RANGE_MR 5 #endif static int __meminit save_mr(struct map_range *mr, int nr_range, unsigned long start_pfn, unsigned long end_pfn, unsigned long page_size_mask) { if (start_pfn < end_pfn) { if (nr_range >= NR_RANGE_MR) panic("run out of range for init_memory_mapping\n"); mr[nr_range].start = start_pfn<<PAGE_SHIFT; mr[nr_range].end = end_pfn<<PAGE_SHIFT; mr[nr_range].page_size_mask = page_size_mask; nr_range++; } return nr_range; } /* * adjust the page_size_mask for small range to go with * big page size instead small one if nearby are ram too. */ static void __ref adjust_range_page_size_mask(struct map_range *mr, int nr_range) { int i; for (i = 0; i < nr_range; i++) { if ((page_size_mask & (1<<PG_LEVEL_2M)) && !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { unsigned long start = round_down(mr[i].start, PMD_SIZE); unsigned long end = round_up(mr[i].end, PMD_SIZE); #ifdef CONFIG_X86_32 if ((end >> PAGE_SHIFT) > max_low_pfn) continue; #endif if (memblock_is_region_memory(start, end - start)) mr[i].page_size_mask |= 1<<PG_LEVEL_2M; } if ((page_size_mask & (1<<PG_LEVEL_1G)) && !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { unsigned long start = round_down(mr[i].start, PUD_SIZE); unsigned long end = round_up(mr[i].end, PUD_SIZE); if (memblock_is_region_memory(start, end - start)) mr[i].page_size_mask |= 1<<PG_LEVEL_1G; } } } static const char *page_size_string(struct map_range *mr) { static const char str_1g[] = "1G"; static const char str_2m[] = "2M"; static const char str_4m[] = "4M"; static const char str_4k[] = "4k"; if (mr->page_size_mask & (1<<PG_LEVEL_1G)) return str_1g; /* * 32-bit without PAE has a 4M large page size. * PG_LEVEL_2M is misnamed, but we can at least * print out the right size in the string. */ if (IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_X86_PAE) && mr->page_size_mask & (1<<PG_LEVEL_2M)) return str_4m; if (mr->page_size_mask & (1<<PG_LEVEL_2M)) return str_2m; return str_4k; } static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long start, unsigned long end) { unsigned long start_pfn, end_pfn, limit_pfn; unsigned long pfn; int i; limit_pfn = PFN_DOWN(end); /* head if not big page alignment ? */ pfn = start_pfn = PFN_DOWN(start); #ifdef CONFIG_X86_32 /* * Don't use a large page for the first 2/4MB of memory * because there are often fixed size MTRRs in there * and overlapping MTRRs into large pages can cause * slowdowns. */ if (pfn == 0) end_pfn = PFN_DOWN(PMD_SIZE); else end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #else /* CONFIG_X86_64 */ end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #endif if (end_pfn > limit_pfn) end_pfn = limit_pfn; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); pfn = end_pfn; } /* big page (2M) range */ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #ifdef CONFIG_X86_32 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); #else /* CONFIG_X86_64 */ end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); #endif if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<<PG_LEVEL_2M)); pfn = end_pfn; } #ifdef CONFIG_X86_64 /* big page (1G) range */ start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); pfn = end_pfn; } /* tail is not big page (1G) alignment */ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<<PG_LEVEL_2M)); pfn = end_pfn; } #endif /* tail is not big page (2M) alignment */ start_pfn = pfn; end_pfn = limit_pfn; nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); if (!after_bootmem) adjust_range_page_size_mask(mr, nr_range); /* try to merge same page size and continuous */ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { unsigned long old_start; if (mr[i].end != mr[i+1].start || mr[i].page_size_mask != mr[i+1].page_size_mask) continue; /* move it */ old_start = mr[i].start; memmove(&mr[i], &mr[i+1], (nr_range - 1 - i) * sizeof(struct map_range)); mr[i--].start = old_start; nr_range--; } for (i = 0; i < nr_range; i++) pr_debug(" [mem %#010lx-%#010lx] page %s\n", mr[i].start, mr[i].end - 1, page_size_string(&mr[i])); return nr_range; } struct range pfn_mapped[E820_MAX_ENTRIES]; int nr_pfn_mapped; static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) { nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_MAX_ENTRIES, nr_pfn_mapped, start_pfn, end_pfn); nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_MAX_ENTRIES); max_pfn_mapped = max(max_pfn_mapped, end_pfn); if (start_pfn < (1UL<<(32-PAGE_SHIFT))) max_low_pfn_mapped = max(max_low_pfn_mapped, min(end_pfn, 1UL<<(32-PAGE_SHIFT))); } bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) { int i; for (i = 0; i < nr_pfn_mapped; i++) if ((start_pfn >= pfn_mapped[i].start) && (end_pfn <= pfn_mapped[i].end)) return true; return false; } /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ unsigned long __ref init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot) { struct map_range mr[NR_RANGE_MR]; unsigned long ret = 0; int nr_range, i; pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n", start, end - 1); memset(mr, 0, sizeof(mr)); nr_range = split_mem_range(mr, 0, start, end); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, mr[i].page_size_mask, prot); add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); return ret >> PAGE_SHIFT; } /* * We need to iterate through the E820 memory map and create direct mappings * for only E820_TYPE_RAM and E820_KERN_RESERVED regions. We cannot simply * create direct mappings for all pfns from [0 to max_low_pfn) and * [4GB to max_pfn) because of possible memory holes in high addresses * that cannot be marked as UC by fixed/variable range MTRRs. * Depending on the alignment of E820 ranges, this may possibly result * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. * * init_mem_mapping() calls init_range_memory_mapping() with big range. * That range would have hole in the middle or ends, and only ram parts * will be mapped in init_range_memory_mapping(). */ static unsigned long __init init_range_memory_mapping( unsigned long r_start, unsigned long r_end) { unsigned long start_pfn, end_pfn; unsigned long mapped_ram_size = 0; int i; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); if (start >= end) continue; /* * if it is overlapping with brk pgt, we need to * alloc pgt buf from memblock instead. */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= min(end, (u64)pgt_buf_top<<PAGE_SHIFT); init_memory_mapping(start, end, PAGE_KERNEL); mapped_ram_size += end - start; can_use_brk_pgt = true; } return mapped_ram_size; } static unsigned long __init get_new_step_size(unsigned long step_size) { /* * Initial mapped size is PMD_SIZE (2M). * We can not set step_size to be PUD_SIZE (1G) yet. * In worse case, when we cross the 1G boundary, and * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) * to map 1G range with PTE. Hence we use one less than the * difference of page table level shifts. * * Don't need to worry about overflow in the top-down case, on 32bit, * when step_size is 0, round_down() returns 0 for start, and that * turns it into 0x100000000ULL. * In the bottom-up case, round_up(x, 0) returns 0 though too, which * needs to be taken into consideration by the code below. */ return step_size << (PMD_SHIFT - PAGE_SHIFT - 1); } /** * memory_map_top_down - Map [map_start, map_end) top down * @map_start: start address of the target memory range * @map_end: end address of the target memory range * * This function will setup direct mapping for memory range * [map_start, map_end) in top-down. That said, the page tables * will be allocated at the end of the memory, and we map the * memory in top-down. */ static void __init memory_map_top_down(unsigned long map_start, unsigned long map_end) { unsigned long real_end, last_start; unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; /* * Systems that have many reserved areas near top of the memory, * e.g. QEMU with less than 1G RAM and EFI enabled, or Xen, will * require lots of 4K mappings which may exhaust pgt_buf. * Start with top-most PMD_SIZE range aligned at PMD_SIZE to ensure * there is enough mapped memory that can be allocated from * memblock. */ addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, map_end); if (!addr) { pr_warn("Failed to release memory for alloc_low_pages()"); real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE)); } else { memblock_phys_free(addr, PMD_SIZE); real_end = addr + PMD_SIZE; } /* step_size need to be small so pgt_buf from BRK could cover it */ step_size = PMD_SIZE; max_pfn_mapped = 0; /* will get exact value next */ min_pfn_mapped = real_end >> PAGE_SHIFT; last_start = real_end; /* * We start from the top (end of memory) and go to the bottom. * The memblock_find_in_range() gets us a block of RAM from the * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages * for page table. */ while (last_start > map_start) { unsigned long start; if (last_start > step_size) { start = round_down(last_start - 1, step_size); if (start < map_start) start = map_start; } else start = map_start; mapped_ram_size += init_range_memory_mapping(start, last_start); last_start = start; min_pfn_mapped = last_start >> PAGE_SHIFT; if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); } if (real_end < map_end) init_range_memory_mapping(real_end, map_end); } /** * memory_map_bottom_up - Map [map_start, map_end) bottom up * @map_start: start address of the target memory range * @map_end: end address of the target memory range * * This function will setup direct mapping for memory range * [map_start, map_end) in bottom-up. Since we have limited the * bottom-up allocation above the kernel, the page tables will * be allocated just above the kernel and we map the memory * in [map_start, map_end) in bottom-up. */ static void __init memory_map_bottom_up(unsigned long map_start, unsigned long map_end) { unsigned long next, start; unsigned long mapped_ram_size = 0; /* step_size need to be small so pgt_buf from BRK could cover it */ unsigned long step_size = PMD_SIZE; start = map_start; min_pfn_mapped = start >> PAGE_SHIFT; /* * We start from the bottom (@map_start) and go to the top (@map_end). * The memblock_find_in_range() gets us a block of RAM from the * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages * for page table. */ while (start < map_end) { if (step_size && map_end - start > step_size) { next = round_up(start + 1, step_size); if (next > map_end) next = map_end; } else { next = map_end; } mapped_ram_size += init_range_memory_mapping(start, next); start = next; if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); } } /* * The real mode trampoline, which is required for bootstrapping CPUs * occupies only a small area under the low 1MB. See reserve_real_mode() * for details. * * If KASLR is disabled the first PGD entry of the direct mapping is copied * to map the real mode trampoline. * * If KASLR is enabled, copy only the PUD which covers the low 1MB * area. This limits the randomization granularity to 1GB for both 4-level * and 5-level paging. */ static void __init init_trampoline(void) { #ifdef CONFIG_X86_64 /* * The code below will alias kernel page-tables in the user-range of the * address space, including the Global bit. So global TLB entries will * be created when using the trampoline page-table. */ if (!kaslr_memory_enabled()) trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; else init_trampoline_kaslr(); #endif } void __init init_mem_mapping(void) { unsigned long end; pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); #ifdef CONFIG_X86_64 end = max_pfn << PAGE_SHIFT; #else end = max_low_pfn << PAGE_SHIFT; #endif /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL); /* Init the trampoline, possibly with KASLR memory offset */ init_trampoline(); /* * If the allocation is in bottom-up direction, we setup direct mapping * in bottom-up, otherwise we setup direct mapping in top-down. */ if (memblock_bottom_up()) { unsigned long kernel_end = __pa_symbol(_end); /* * we need two separate calls here. This is because we want to * allocate page tables above the kernel. So we first map * [kernel_end, end) to make memory above the kernel be mapped * as soon as possible. And then use page tables allocated above * the kernel to map [ISA_END_ADDRESS, kernel_end). */ memory_map_bottom_up(kernel_end, end); memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); } else { memory_map_top_down(ISA_END_ADDRESS, end); } #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { /* can we preserve max_low_pfn ?*/ max_low_pfn = max_pfn; } #else early_ioremap_page_table_range_init(); #endif load_cr3(swapper_pg_dir); __flush_tlb_all(); x86_init.hyper.init_mem_mapping(); early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } /* * Initialize an mm_struct to be used during poking and a pointer to be used * during patching. */ void __init poking_init(void) { spinlock_t *ptl; pte_t *ptep; text_poke_mm = mm_alloc(); BUG_ON(!text_poke_mm); /* Xen PV guests need the PGD to be pinned. */ paravirt_enter_mmap(text_poke_mm); set_notrack_mm(text_poke_mm); /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, * and adjust the address if the PMD ends after the first one. */ text_poke_mm_addr = TASK_UNMAPPED_BASE; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) text_poke_mm_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) % (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE); if (((text_poke_mm_addr + PAGE_SIZE) & ~PMD_MASK) == 0) text_poke_mm_addr += PAGE_SIZE; /* * We need to trigger the allocation of the page-tables that will be * needed for poking now. Later, poking may be performed in an atomic * section, which might cause allocation to fail. */ ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl); BUG_ON(!ptep); pte_unmap_unlock(ptep, ptl); } /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * * On x86, access has to be given to the first megabyte of RAM because that * area traditionally contains BIOS code and data regions used by X, dosemu, * and similar apps. Since they map the entire memory range, the whole range * must be allowed (for mapping), but any areas that would otherwise be * disallowed are flagged as being "zero filled" instead of rejected. * Access has to be given to non-kernel-ram areas as well, these contain the * PCI mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) != REGION_DISJOINT) { /* * For disallowed memory regions in the low 1MB range, * request that the page be shown as all zeros. */ if (pagenr < 256) return 2; return 0; } /* * This must follow RAM test, since System RAM is considered a * restricted resource under CONFIG_STRICT_DEVMEM. */ if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) { /* Low 1MB bypasses iomem restrictions. */ if (pagenr < 256) return 1; return 0; } return 1; } void free_init_pages(const char *what, unsigned long begin, unsigned long end) { unsigned long begin_aligned, end_aligned; /* Make sure boundaries are page aligned */ begin_aligned = PAGE_ALIGN(begin); end_aligned = end & PAGE_MASK; if (WARN_ON(begin_aligned != begin || end_aligned != end)) { begin = begin_aligned; end = end_aligned; } if (begin >= end) return; /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will * create a kernel page fault: */ if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); /* * Inform kmemleak about the hole in the memory since the * corresponding pages will be unmapped. */ kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that * writeable and non-executable first. */ set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); } } /* * begin/end can be in the direct map or the "high kernel mapping" * used for the kernel image only. free_init_pages() will do the * right thing for either kind of address. */ void free_kernel_image_pages(const char *what, void *begin, void *end) { unsigned long begin_ul = (unsigned long)begin; unsigned long end_ul = (unsigned long)end; unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; free_init_pages(what, begin_ul, end_ul); /* * PTI maps some of the kernel into userspace. For performance, * this includes some kernel areas that do not contain secrets. * Those areas might be adjacent to the parts of the kernel image * being freed, which may contain secrets. Remove the "high kernel * image mapping" for these freed areas, ensuring they are not even * potentially vulnerable to Meltdown regardless of the specific * optimizations PTI is currently using. * * The "noalias" prevents unmapping the direct map alias which is * needed to access the freed pages. * * This is only valid for 64bit kernels. 32bit has only one mapping * which can't be treated in this way for obvious reasons. */ if (IS_ENABLED(CONFIG_X86_64) && cpu_feature_enabled(X86_FEATURE_PTI)) set_memory_np_noalias(begin_ul, len_pages); } void __ref free_initmem(void) { e820__reallocate_tables(); mem_encrypt_free_decrypted_mem(); free_kernel_image_pages("unused kernel image (initmem)", &__init_begin, &__init_end); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { /* * end could be not aligned, and We can not align that, * decompressor could be confused by aligned initrd_end * We already reserve the end partial page before in * - i386_start_kernel() * - x86_64_start_kernel() * - relocate_initrd() * So here We can do PAGE_ALIGN() safely to get partial page to be freed */ free_init_pages("initrd", start, PAGE_ALIGN(end)); } #endif void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); #endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif free_area_init(max_zone_pfns); } __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; #ifdef CONFIG_ADDRESS_MASKING DEFINE_PER_CPU(u64, tlbstate_untag_mask); EXPORT_PER_CPU_SYMBOL(tlbstate_untag_mask); #endif void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { /* entry 0 MUST be WB (hardwired to speed up translations) */ BUG_ON(!entry && cache != _PAGE_CACHE_MODE_WB); __cachemode2pte_tbl[cache] = __cm_idx2pte(entry); __pte2cachemode_tbl[entry] = cache; } #ifdef CONFIG_SWAP unsigned long arch_max_swapfile_size(void) { unsigned long pages; pages = generic_max_swapfile_size(); if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ unsigned long long l1tf_limit = l1tf_pfn_limit(); /* * We encode swap offsets also with 3 bits below those for pfn * which makes the usable limit higher. */ #if CONFIG_PGTABLE_LEVELS > 2 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; #endif pages = min_t(unsigned long long, l1tf_limit, pages); } return pages; } #endif #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; #ifdef CONFIG_ARCH_HAS_EXECMEM_ROX void execmem_fill_trapping_insns(void *ptr, size_t size) { memset(ptr, INT3_INSN_OPCODE, size); } #endif struct execmem_info __init *execmem_arch_setup(void) { unsigned long start, offset = 0; enum execmem_range_flags flags; pgprot_t pgprot; if (kaslr_enabled()) offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; start = MODULES_VADDR + offset; if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) && cpu_feature_enabled(X86_FEATURE_PSE)) { pgprot = PAGE_KERNEL_ROX; flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; } else { pgprot = PAGE_KERNEL; flags = EXECMEM_KASAN_SHADOW; } execmem_info = (struct execmem_info){ .ranges = { [EXECMEM_MODULE_TEXT] = { .flags = flags, .start = start, .end = MODULES_END, .pgprot = pgprot, .alignment = MODULE_ALIGN, }, [EXECMEM_KPROBES] = { .flags = flags, .start = start, .end = MODULES_END, .pgprot = PAGE_KERNEL_ROX, .alignment = MODULE_ALIGN, }, [EXECMEM_FTRACE] = { .flags = flags, .start = start, .end = MODULES_END, .pgprot = pgprot, .alignment = MODULE_ALIGN, }, [EXECMEM_BPF] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, .pgprot = PAGE_KERNEL, .alignment = MODULE_ALIGN, }, [EXECMEM_MODULE_DATA] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, .pgprot = PAGE_KERNEL, .alignment = MODULE_ALIGN, }, }, }; return &execmem_info; } #endif /* CONFIG_EXECMEM */ |
| 220 221 127 126 23 76 149 133 23 149 23 149 149 128 23 148 132 18 148 10 12 21 144 131 21 145 6 25 145 76 45 45 44 21 21 4 4 4 19 76 77 77 54 54 131 21 145 144 145 131 131 21 21 21 21 21 3 3 3 21 21 20 21 131 129 129 131 145 145 145 3 144 145 145 145 144 21 145 3 77 54 45 52 45 53 54 77 77 77 45 45 45 77 77 63 63 62 37 37 11 4 37 37 37 66 7 7 7 7 6 7 7 67 1 11 11 1 8 10 10 10 10 2 1 1 9 8 9 9 9 1 9 100 90 100 11 64 4 4 4 64 10 5 5 5 10 132 132 133 133 133 133 131 131 129 131 3 133 57 57 57 54 54 54 41 41 109 221 10 10 10 10 9 1 11 9 11 1 9 9 9 9 9 287 3 3 2 2 2 1 1 1 1 2 1 1 1 1 1 5 4 5 4 2 4 3 4 4 12 12 8 8 8 8 8 6 6 6 8 12 8 4 4 4 4 4 1 3 3 3 4 1 1 1 1 1 1 4 4 12 109 109 109 109 109 16 16 16 16 16 16 15 15 16 23 23 23 2 2 2 2 23 21 21 21 12 12 9 12 36 36 19 19 19 19 6 10 4 5 5 5 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 1 6 2 5 8 2 2 2 2 2 2 2 2 1 64 1 1 267 108 40 25 123 2 224 128 268 2 11 6 4 1 151 1 3 3 3 3 3 3 3 1 197 196 197 196 136 197 131 196 197 17 197 152 3 197 1 193 193 193 65 65 152 152 193 193 193 192 148 146 126 193 193 2 4 5 4 5 3 5 2 2 2 5 5 5 5 5 5 5 2 2 2 2 5 1 4 5 4 5 8 4 8 6 4 6 6 2 1 4 4 4 4 4 4 8 2 9 9 3 3 3 12 12 2 10 3 9 7 1 8 1 7 6 6 5 6 1 6 6 6 21 20 18 18 3 18 17 12 12 7 15 21 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <net/switchdev.h> #include "br_private.h" #include "br_private_tunnel.h" static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid); static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct net_bridge_vlan *vle = ptr; u16 vid = *(u16 *)arg->key; return vle->vid != vid; } static const struct rhashtable_params br_vlan_rht_params = { .head_offset = offsetof(struct net_bridge_vlan, vnode), .key_offset = offsetof(struct net_bridge_vlan, vid), .key_len = sizeof(u16), .nelem_hint = 3, .max_size = VLAN_N_VID, .obj_cmpfn = br_vlan_cmp, .automatic_shrinking = true, }; static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) { return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); } static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, const struct net_bridge_vlan *v) { if (vg->pvid == v->vid) return; smp_wmb(); br_vlan_set_pvid_state(vg, v->state); vg->pvid = v->vid; } static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid != vid) return; smp_wmb(); vg->pvid = 0; } /* Update the BRIDGE_VLAN_INFO_PVID and BRIDGE_VLAN_INFO_UNTAGGED flags of @v. * If @commit is false, return just whether the BRIDGE_VLAN_INFO_PVID and * BRIDGE_VLAN_INFO_UNTAGGED bits of @flags would produce any change onto @v. */ static bool __vlan_flags_update(struct net_bridge_vlan *v, u16 flags, bool commit) { struct net_bridge_vlan_group *vg; bool change; if (br_vlan_is_master(v)) vg = br_vlan_group(v->br); else vg = nbp_vlan_group(v->port); /* check if anything would be changed on commit */ change = !!(flags & BRIDGE_VLAN_INFO_PVID) == !!(vg->pvid != v->vid) || ((flags ^ v->flags) & BRIDGE_VLAN_INFO_UNTAGGED); if (!commit) goto out; if (flags & BRIDGE_VLAN_INFO_PVID) __vlan_add_pvid(vg, v); else __vlan_delete_pvid(vg, v->vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; else v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; out: return change; } static bool __vlan_flags_would_change(struct net_bridge_vlan *v, u16 flags) { return __vlan_flags_update(v, flags, false); } static void __vlan_flags_commit(struct net_bridge_vlan *v, u16 flags) { __vlan_flags_update(v, flags, true); } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, struct net_bridge_vlan *v, u16 flags, struct netlink_ext_ack *extack) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q add. */ err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack); if (err == -EOPNOTSUPP) return vlan_vid_add(dev, br->vlan_proto, v->vid); v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV; return err; } static void __vlan_add_list(struct net_bridge_vlan *v) { struct net_bridge_vlan_group *vg; struct list_head *headp, *hpos; struct net_bridge_vlan *vent; if (br_vlan_is_master(v)) vg = br_vlan_group(v->br); else vg = nbp_vlan_group(v->port); headp = &vg->vlan_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct net_bridge_vlan, vlist); if (v->vid >= vent->vid) break; } list_add_rcu(&v->vlist, hpos); } static void __vlan_del_list(struct net_bridge_vlan *v) { list_del_rcu(&v->vlist); } static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, const struct net_bridge_vlan *v) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q del. */ err = br_switchdev_port_vlan_del(dev, v->vid); if (!(v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)) vlan_vid_del(dev, br->vlan_proto, v->vid); return err == -EOPNOTSUPP ? 0 : err; } /* Returns a master vlan, if it didn't exist it gets created. In all cases * a reference is taken to the master vlan before returning. */ static struct net_bridge_vlan * br_vlan_get_master(struct net_bridge *br, u16 vid, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *masterv; vg = br_vlan_group(br); masterv = br_vlan_find(vg, vid); if (!masterv) { bool changed; /* missing global ctx, create it now */ if (br_vlan_add(br, vid, 0, &changed, extack)) return NULL; masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) return NULL; refcount_set(&masterv->refcnt, 1); return masterv; } refcount_inc(&masterv->refcnt); return masterv; } static void br_master_vlan_rcu_free(struct rcu_head *rcu) { struct net_bridge_vlan *v; v = container_of(rcu, struct net_bridge_vlan, rcu); WARN_ON(!br_vlan_is_master(v)); free_percpu(v->stats); v->stats = NULL; kfree(v); } static void br_vlan_put_master(struct net_bridge_vlan *masterv) { struct net_bridge_vlan_group *vg; if (!br_vlan_is_master(masterv)) return; vg = br_vlan_group(masterv->br); if (refcount_dec_and_test(&masterv->refcnt)) { rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); br_multicast_toggle_one_vlan(masterv, false); br_multicast_ctx_deinit(&masterv->br_mcast_ctx); call_rcu(&masterv->rcu, br_master_vlan_rcu_free); } } static void nbp_vlan_rcu_free(struct rcu_head *rcu) { struct net_bridge_vlan *v; v = container_of(rcu, struct net_bridge_vlan, rcu); WARN_ON(br_vlan_is_master(v)); /* if we had per-port stats configured then free them here */ if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS) free_percpu(v->stats); v->stats = NULL; kfree(v); } static void br_vlan_init_state(struct net_bridge_vlan *v) { struct net_bridge *br; if (br_vlan_is_master(v)) br = v->br; else br = v->port->br; if (br_opt_get(br, BROPT_MST_ENABLED)) { br_mst_vlan_init_state(v); return; } v->state = BR_STATE_FORWARDING; v->msti = 0; } /* This is the shared VLAN add function which works for both ports and bridge * devices. There are four possible calls to this function in terms of the * vlan entry type: * 1. vlan is being added on a port (no master flags, global entry exists) * 2. vlan is being added on a bridge (both master and brentry flags) * 3. vlan is being added on a port, but a global entry didn't exist which * is being created right now (master flag set, brentry flag unset), the * global entry is used for global per-vlan features, but not for filtering * 4. same as 3 but with both master and brentry flags set so the entry * will be used for filtering in both the port and the bridge */ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, struct netlink_ext_ack *extack) { struct net_bridge_vlan *masterv = NULL; struct net_bridge_port *p = NULL; struct net_bridge_vlan_group *vg; struct net_device *dev; struct net_bridge *br; int err; if (br_vlan_is_master(v)) { br = v->br; dev = br->dev; vg = br_vlan_group(br); } else { p = v->port; br = p->br; dev = p->dev; vg = nbp_vlan_group(p); } if (p) { /* Add VLAN to the device filter if it is supported. * This ensures tagged traffic enters the bridge when * promiscuous mode is disabled by br_manage_promisc(). */ err = __vlan_vid_add(dev, br, v, flags, extack); if (err) goto out; /* need to work on the master vlan too */ if (flags & BRIDGE_VLAN_INFO_MASTER) { bool changed; err = br_vlan_add(br, v->vid, flags | BRIDGE_VLAN_INFO_BRENTRY, &changed, extack); if (err) goto out_filt; if (changed) br_vlan_notify(br, NULL, v->vid, 0, RTM_NEWVLAN); } masterv = br_vlan_get_master(br, v->vid, extack); if (!masterv) { err = -ENOMEM; goto out_filt; } v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { v->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!v->stats) { err = -ENOMEM; goto out_filt; } v->priv_flags |= BR_VLFLAG_PER_PORT_STATS; } else { v->stats = masterv->stats; } br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx); } else { if (br_vlan_should_use(v)) { err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack); if (err && err != -EOPNOTSUPP) goto out; } br_multicast_ctx_init(br, v, &v->br_mcast_ctx); v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED; } /* Add the dev mac and count the vlan only if it's usable */ if (br_vlan_should_use(v)) { if (!br_opt_get(br, BROPT_FDB_LOCAL_VLAN_0)) { err = br_fdb_add_local(br, p, dev->dev_addr, v->vid); if (err) { br_err(br, "failed insert local address into bridge forwarding table\n"); goto out_filt; } } vg->num_vlans++; } /* set the state before publishing */ br_vlan_init_state(v); err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); if (err) goto out_fdb_insert; __vlan_add_list(v); __vlan_flags_commit(v, flags); br_multicast_toggle_one_vlan(v, true); if (p) nbp_vlan_set_vlan_dev_state(p, v->vid); out: return err; out_fdb_insert: if (br_vlan_should_use(v)) { br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid); vg->num_vlans--; } out_filt: if (p) { __vlan_vid_del(dev, br, v); if (masterv) { if (v->stats && masterv->stats != v->stats) free_percpu(v->stats); v->stats = NULL; br_vlan_put_master(masterv); v->brvlan = NULL; } } else { br_switchdev_port_vlan_del(dev, v->vid); } goto out; } static int __vlan_del(struct net_bridge_vlan *v) { struct net_bridge_vlan *masterv = v; struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; int err = 0; if (br_vlan_is_master(v)) { vg = br_vlan_group(v->br); } else { p = v->port; vg = nbp_vlan_group(v->port); masterv = v->brvlan; } __vlan_delete_pvid(vg, v->vid); if (p) { err = __vlan_vid_del(p->dev, p->br, v); if (err) goto out; } else { err = br_switchdev_port_vlan_del(v->br->dev, v->vid); if (err && err != -EOPNOTSUPP) goto out; err = 0; } if (br_vlan_should_use(v)) { v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans--; } if (masterv != v) { vlan_tunnel_info_del(vg, v); rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); __vlan_del_list(v); nbp_vlan_set_vlan_dev_state(p, v->vid); br_multicast_toggle_one_vlan(v, false); br_multicast_port_ctx_deinit(&v->port_mcast_ctx); call_rcu(&v->rcu, nbp_vlan_rcu_free); } br_vlan_put_master(masterv); out: return err; } static void __vlan_group_free(struct net_bridge_vlan_group *vg) { WARN_ON(!list_empty(&vg->vlan_list)); rhashtable_destroy(&vg->vlan_hash); vlan_tunnel_deinit(vg); kfree(vg); } static void __vlan_flush(const struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_vlan_group *vg) { struct net_bridge_vlan *vlan, *tmp; u16 v_start = 0, v_end = 0; int err; __vlan_delete_pvid(vg, vg->pvid); list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) { /* take care of disjoint ranges */ if (!v_start) { v_start = vlan->vid; } else if (vlan->vid - v_end != 1) { /* found range end, notify and start next one */ br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN); v_start = vlan->vid; } v_end = vlan->vid; err = __vlan_del(vlan); if (err) { br_err(br, "port %u(%s) failed to delete vlan %d: %pe\n", (unsigned int) p->port_no, p->dev->name, vlan->vid, ERR_PTR(err)); } } /* notify about the last/whole vlan range */ if (v_start) br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN); } struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; u16 vid; /* If this packet was not filtered at input, let it pass */ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id has untagged flag set, * send untagged; otherwise, send tagged. */ br_vlan_get_tag(skb, &vid); v = br_vlan_find(vg, vid); /* Vlan entry must be configured at this point. The * only exception is the bridge is set in promisc mode and the * packet is destined for the bridge device. In this case * pass the packet as is. */ if (!v || !br_vlan_should_use(v)) { if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { goto out; } else { kfree_skb(skb); return NULL; } } if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); u64_stats_add(&stats->tx_bytes, skb->len); u64_stats_inc(&stats->tx_packets); u64_stats_update_end(&stats->syncp); } /* If the skb will be sent using forwarding offload, the assumption is * that the switchdev will inject the packet into hardware together * with the bridge VLAN, so that it can be forwarded according to that * VLAN. The switchdev should deal with popping the VLAN header in * hardware on each egress port as appropriate. So only strip the VLAN * header if forwarding offload is not being used. */ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED && !br_switchdev_frame_uses_tx_fwd_offload(skb)) __vlan_hwaccel_clear_tag(skb); if (p && (p->flags & BR_VLAN_TUNNEL) && br_handle_egress_vlan_tunnel(skb, v)) { kfree_skb(skb); return NULL; } out: return skb; } /* Called under RCU */ static bool __allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, u8 *state, struct net_bridge_vlan **vlan) { struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; bool tagged; BR_INPUT_SKB_CB(skb)->vlan_filtered = true; /* If vlan tx offload is disabled on bridge device and frame was * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ if (unlikely(!skb_vlan_tag_present(skb) && skb->protocol == br->vlan_proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) return false; } if (!br_vlan_get_tag(skb, vid)) { /* Tagged frame */ if (skb->vlan_proto != br->vlan_proto) { /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (unlikely(!skb)) return false; skb_pull(skb, ETH_HLEN); skb_reset_mac_len(skb); *vid = 0; tagged = false; } else { tagged = true; } } else { /* Untagged frame */ tagged = false; } if (!*vid) { u16 pvid = br_get_pvid(vg); /* Frame had a tag with VID 0 or did not have a tag. * See if pvid is set on this port. That tells us which * vlan untagged or priority-tagged traffic belongs to. */ if (!pvid) goto drop; /* PVID is set on this port. Any untagged or priority-tagged * ingress frame is considered to belong to this vlan. */ *vid = pvid; if (likely(!tagged)) /* Untagged Frame. */ __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid); else /* Priority-tagged Frame. * At this point, we know that skb->vlan_tci VID * field was 0. * We update only VID field and preserve PCP field. */ skb->vlan_tci |= pvid; /* if snooping and stats are disabled we can avoid the lookup */ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); if (!br_vlan_state_allowed(*state, true)) goto drop; } return true; } } v = br_vlan_find(vg, *vid); if (!v || !br_vlan_should_use(v)) goto drop; if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_state(v); if (!br_vlan_state_allowed(*state, true)) goto drop; } if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); u64_stats_add(&stats->rx_bytes, skb->len); u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } *vlan = v; return true; drop: kfree_skb(skb); return false; } bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, u8 *state, struct net_bridge_vlan **vlan) { /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ *vlan = NULL; if (!br_opt_get(br, BROPT_VLAN_ENABLED)) { BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; } return __allowed_ingress(br, vg, skb, vid, state, vlan); } /* Called under RCU. */ bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb) { const struct net_bridge_vlan *v; u16 vid; /* If this packet was not filtered at input, let it pass */ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; br_vlan_get_tag(skb, &vid); v = br_vlan_find(vg, vid); if (v && br_vlan_should_use(v) && br_vlan_state_allowed(br_vlan_get_state(v), false)) return true; return false; } /* Called under RCU */ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) { struct net_bridge_vlan_group *vg; struct net_bridge *br = p->br; struct net_bridge_vlan *v; /* If filtering was disabled at input, let it pass. */ if (!br_opt_get(br, BROPT_VLAN_ENABLED)) return true; vg = nbp_vlan_group_rcu(p); if (!vg || !vg->num_vlans) return false; if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) *vid = 0; if (!*vid) { *vid = br_get_pvid(vg); if (!*vid || !br_vlan_state_allowed(br_vlan_get_pvid_state(vg), true)) return false; return true; } v = br_vlan_find(vg, *vid); if (v && br_vlan_state_allowed(br_vlan_get_state(v), true)) return true; return false; } static int br_vlan_add_existing(struct net_bridge *br, struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan, u16 flags, bool *changed, struct netlink_ext_ack *extack) { bool becomes_brentry = false; bool would_change = false; int err; if (!br_vlan_is_brentry(vlan)) { /* Trying to change flags of non-existent bridge vlan */ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) return -EINVAL; becomes_brentry = true; } else { would_change = __vlan_flags_would_change(vlan, flags); } /* Master VLANs that aren't brentries weren't notified before, * time to notify them now. */ if (becomes_brentry || would_change) { err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, would_change, extack); if (err && err != -EOPNOTSUPP) return err; } if (becomes_brentry) { /* It was only kept for port vlans, now make it real */ err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid); if (err) { br_err(br, "failed to insert local address into bridge forwarding table\n"); goto err_fdb_insert; } refcount_inc(&vlan->refcnt); vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; *changed = true; br_multicast_toggle_one_vlan(vlan, true); } __vlan_flags_commit(vlan, flags); if (would_change) *changed = true; return 0; err_fdb_insert: br_switchdev_port_vlan_del(br->dev, vlan->vid); return err; } /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. * changed must be true only if the vlan was created or updated */ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; int ret; ASSERT_RTNL(); *changed = false; vg = br_vlan_group(br); vlan = br_vlan_find(vg, vid); if (vlan) return br_vlan_add_existing(br, vg, vlan, flags, changed, extack); vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) return -ENOMEM; vlan->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vlan->stats) { kfree(vlan); return -ENOMEM; } vlan->vid = vid; vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; vlan->br = br; if (flags & BRIDGE_VLAN_INFO_BRENTRY) refcount_set(&vlan->refcnt, 1); ret = __vlan_add(vlan, flags, extack); if (ret) { free_percpu(vlan->stats); kfree(vlan); } else { *changed = true; } return ret; } /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ int br_vlan_delete(struct net_bridge *br, u16 vid) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; ASSERT_RTNL(); vg = br_vlan_group(br); v = br_vlan_find(vg, vid); if (!v || !br_vlan_is_brentry(v)) return -ENOENT; br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); br_fdb_delete_by_port(br, NULL, vid, 0); vlan_tunnel_info_del(vg, v); return __vlan_del(v); } void br_vlan_flush(struct net_bridge *br) { struct net_bridge_vlan_group *vg; ASSERT_RTNL(); vg = br_vlan_group(br); __vlan_flush(br, NULL, vg); RCU_INIT_POINTER(br->vlgrp, NULL); synchronize_net(); __vlan_group_free(vg); } struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid) { if (!vg) return NULL; return br_vlan_lookup(&vg->vlan_hash, vid); } /* Must be protected by RTNL. */ static void recalculate_group_addr(struct net_bridge *br) { if (br_opt_get(br, BROPT_GROUP_ADDR_SET)) return; spin_lock_bh(&br->lock); if (!br_opt_get(br, BROPT_VLAN_ENABLED) || br->vlan_proto == htons(ETH_P_8021Q)) { /* Bridge Group Address */ br->group_addr[5] = 0x00; } else { /* vlan_enabled && ETH_P_8021AD */ /* Provider Bridge Group Address */ br->group_addr[5] = 0x08; } spin_unlock_bh(&br->lock); } /* Must be protected by RTNL. */ void br_recalculate_fwd_mask(struct net_bridge *br) { if (!br_opt_get(br, BROPT_VLAN_ENABLED) || br->vlan_proto == htons(ETH_P_8021Q)) br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; else /* vlan_enabled && ETH_P_8021AD */ br->group_fwd_mask_required = BR_GROUPFWD_8021AD & ~(1u << br->group_addr[5]); } int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.vlan_filtering = val, }; int err; if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val) return 0; br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); err = switchdev_port_attr_set(br->dev, &attr, extack); if (err && err != -EOPNOTSUPP) { br_opt_toggle(br, BROPT_VLAN_ENABLED, !val); return err; } br_manage_promisc(br); recalculate_group_addr(br); br_recalculate_fwd_mask(br); if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n"); br_multicast_toggle_vlan_snooping(br, false, NULL); } return 0; } bool br_vlan_enabled(const struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); return br_opt_get(br, BROPT_VLAN_ENABLED); } EXPORT_SYMBOL_GPL(br_vlan_enabled); int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto) { struct net_bridge *br = netdev_priv(dev); *p_proto = ntohs(br->vlan_proto); return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_proto); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.vlan_protocol = ntohs(proto), }; int err = 0; struct net_bridge_port *p; struct net_bridge_vlan *vlan; struct net_bridge_vlan_group *vg; __be16 oldproto = br->vlan_proto; if (br->vlan_proto == proto) return 0; err = switchdev_port_attr_set(br->dev, &attr, extack); if (err && err != -EOPNOTSUPP) return err; /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) continue; err = vlan_vid_add(p->dev, proto, vlan->vid); if (err) goto err_filt; } } br->vlan_proto = proto; recalculate_group_addr(br); br_recalculate_fwd_mask(br); /* Delete VLANs for the old proto from the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) continue; vlan_vid_del(p->dev, oldproto, vlan->vid); } } return 0; err_filt: attr.u.vlan_protocol = ntohs(oldproto); switchdev_port_attr_set(br->dev, &attr, NULL); list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) continue; vlan_vid_del(p->dev, proto, vlan->vid); } list_for_each_entry_continue_reverse(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) continue; vlan_vid_del(p->dev, proto, vlan->vid); } } return err; } int br_vlan_set_proto(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { if (!eth_type_vlan(htons(val))) return -EPROTONOSUPPORT; return __br_vlan_set_proto(br, htons(val), extack); } int br_vlan_set_stats(struct net_bridge *br, unsigned long val) { switch (val) { case 0: case 1: br_opt_toggle(br, BROPT_VLAN_STATS_ENABLED, !!val); break; default: return -EINVAL; } return 0; } int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val) { struct net_bridge_port *p; /* allow to change the option if there are no port vlans configured */ list_for_each_entry(p, &br->port_list, list) { struct net_bridge_vlan_group *vg = nbp_vlan_group(p); if (vg->num_vlans) return -EBUSY; } switch (val) { case 0: case 1: br_opt_toggle(br, BROPT_VLAN_STATS_PER_PORT, !!val); break; default: return -EINVAL; } return 0; } static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) { struct net_bridge_vlan *v; if (vid != vg->pvid) return false; v = br_vlan_lookup(&vg->vlan_hash, vid); if (v && br_vlan_should_use(v) && (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)) return true; return false; } static void br_vlan_disable_default_pvid(struct net_bridge *br) { struct net_bridge_port *p; u16 pvid = br->default_pvid; /* Disable default_pvid on all ports where it is still * configured. */ if (vlan_default_pvid(br_vlan_group(br), pvid)) { if (!br_vlan_delete(br, pvid)) br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN); } list_for_each_entry(p, &br->port_list, list) { if (vlan_default_pvid(nbp_vlan_group(p), pvid) && !nbp_vlan_delete(p, pvid)) br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN); } br->default_pvid = 0; } int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, struct netlink_ext_ack *extack) { const struct net_bridge_vlan *pvent; struct net_bridge_vlan_group *vg; struct net_bridge_port *p; unsigned long *changed; bool vlchange; u16 old_pvid; int err = 0; if (!pvid) { br_vlan_disable_default_pvid(br); return 0; } changed = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL); if (!changed) return -ENOMEM; old_pvid = br->default_pvid; /* Update default_pvid config only if we do not conflict with * user configuration. */ vg = br_vlan_group(br); pvent = br_vlan_find(vg, pvid); if ((!old_pvid || vlan_default_pvid(vg, old_pvid)) && (!pvent || !br_vlan_should_use(pvent))) { err = br_vlan_add(br, pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_BRENTRY, &vlchange, extack); if (err) goto out; if (br_vlan_delete(br, old_pvid)) br_vlan_notify(br, NULL, old_pvid, 0, RTM_DELVLAN); br_vlan_notify(br, NULL, pvid, 0, RTM_NEWVLAN); __set_bit(0, changed); } list_for_each_entry(p, &br->port_list, list) { /* Update default_pvid config only if we do not conflict with * user configuration. */ vg = nbp_vlan_group(p); if ((old_pvid && !vlan_default_pvid(vg, old_pvid)) || br_vlan_find(vg, pvid)) continue; err = nbp_vlan_add(p, pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED, &vlchange, extack); if (err) goto err_port; if (nbp_vlan_delete(p, old_pvid)) br_vlan_notify(br, p, old_pvid, 0, RTM_DELVLAN); br_vlan_notify(p->br, p, pvid, 0, RTM_NEWVLAN); __set_bit(p->port_no, changed); } br->default_pvid = pvid; out: bitmap_free(changed); return err; err_port: list_for_each_entry_continue_reverse(p, &br->port_list, list) { if (!test_bit(p->port_no, changed)) continue; if (old_pvid) { nbp_vlan_add(p, old_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED, &vlchange, NULL); br_vlan_notify(p->br, p, old_pvid, 0, RTM_NEWVLAN); } nbp_vlan_delete(p, pvid); br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN); } if (test_bit(0, changed)) { if (old_pvid) { br_vlan_add(br, old_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_BRENTRY, &vlchange, NULL); br_vlan_notify(br, NULL, old_pvid, 0, RTM_NEWVLAN); } br_vlan_delete(br, pvid); br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN); } goto out; } int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { u16 pvid = val; int err = 0; if (val >= VLAN_VID_MASK) return -EINVAL; if (pvid == br->default_pvid) goto out; /* Only allow default pvid change when filtering is disabled */ if (br_opt_get(br, BROPT_VLAN_ENABLED)) { pr_info_once("Please disable vlan filtering to change default_pvid\n"); err = -EPERM; goto out; } err = __br_vlan_set_default_pvid(br, pvid, extack); out: return err; } int br_vlan_init(struct net_bridge *br) { struct net_bridge_vlan_group *vg; int ret = -ENOMEM; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) goto out; ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; ret = vlan_tunnel_init(vg); if (ret) goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); br->vlan_proto = htons(ETH_P_8021Q); br->default_pvid = 1; rcu_assign_pointer(br->vlgrp, vg); out: return ret; err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: kfree(vg); goto out; } int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = p->br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.vlan_filtering = br_opt_get(p->br, BROPT_VLAN_ENABLED), }; struct net_bridge_vlan_group *vg; int ret = -ENOMEM; vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL); if (!vg) goto out; ret = switchdev_port_attr_set(p->dev, &attr, extack); if (ret && ret != -EOPNOTSUPP) goto err_vlan_enabled; ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; ret = vlan_tunnel_init(vg); if (ret) goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { bool changed; ret = nbp_vlan_add(p, p->br->default_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED, &changed, extack); if (ret) goto err_vlan_add; br_vlan_notify(p->br, p, p->br->default_pvid, 0, RTM_NEWVLAN); } out: return ret; err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); vlan_tunnel_deinit(vg); err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: err_vlan_enabled: kfree(vg); goto out; } /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. * changed must be true only if the vlan was created or updated */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack) { struct net_bridge_vlan *vlan; int ret; ASSERT_RTNL(); *changed = false; vlan = br_vlan_find(nbp_vlan_group(port), vid); if (vlan) { bool would_change = __vlan_flags_would_change(vlan, flags); if (would_change) { /* Pass the flags to the hardware bridge */ ret = br_switchdev_port_vlan_add(port->dev, vid, flags, true, extack); if (ret && ret != -EOPNOTSUPP) return ret; } __vlan_flags_commit(vlan, flags); *changed = would_change; return 0; } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) return -ENOMEM; vlan->vid = vid; vlan->port = port; ret = __vlan_add(vlan, flags, extack); if (ret) kfree(vlan); else *changed = true; return ret; } /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { struct net_bridge_vlan *v; ASSERT_RTNL(); v = br_vlan_find(nbp_vlan_group(port), vid); if (!v) return -ENOENT; br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); br_fdb_delete_by_port(port->br, port, vid, 0); return __vlan_del(v); } void nbp_vlan_flush(struct net_bridge_port *port) { struct net_bridge_vlan_group *vg; ASSERT_RTNL(); vg = nbp_vlan_group(port); __vlan_flush(port->br, port, vg); RCU_INIT_POINTER(port->vlgrp, NULL); synchronize_net(); __vlan_group_free(vg); } void br_vlan_get_stats(const struct net_bridge_vlan *v, struct pcpu_sw_netstats *stats) { int i; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(i) { u64 rxpackets, rxbytes, txpackets, txbytes; struct pcpu_sw_netstats *cpu_stats; unsigned int start; cpu_stats = per_cpu_ptr(v->stats, i); do { start = u64_stats_fetch_begin(&cpu_stats->syncp); rxpackets = u64_stats_read(&cpu_stats->rx_packets); rxbytes = u64_stats_read(&cpu_stats->rx_bytes); txbytes = u64_stats_read(&cpu_stats->tx_bytes); txpackets = u64_stats_read(&cpu_stats->tx_packets); } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rxpackets); u64_stats_add(&stats->rx_bytes, rxbytes); u64_stats_add(&stats->tx_bytes, txbytes); u64_stats_add(&stats->tx_packets, txpackets); } } int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p; ASSERT_RTNL(); p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) vg = br_vlan_group(netdev_priv(dev)); else return -EINVAL; *p_pvid = br_get_pvid(vg); return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid); int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p; p = br_port_get_check_rcu(dev); if (p) vg = nbp_vlan_group_rcu(p); else if (netif_is_bridge_master(dev)) vg = br_vlan_group_rcu(netdev_priv(dev)); else return -EINVAL; *p_pvid = br_get_pvid(vg); return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); void br_vlan_fill_forward_path_pvid(struct net_bridge *br, struct net_device_path_ctx *ctx, struct net_device_path *path) { struct net_bridge_vlan_group *vg; int idx = ctx->num_vlans - 1; u16 vid; path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; if (!br_opt_get(br, BROPT_VLAN_ENABLED)) return; vg = br_vlan_group_rcu(br); if (idx >= 0 && ctx->vlan[idx].proto == br->vlan_proto) { vid = ctx->vlan[idx].id; } else { path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG; vid = br_get_pvid(vg); } path->bridge.vlan_id = vid; path->bridge.vlan_proto = br->vlan_proto; } int br_vlan_fill_forward_path_mode(struct net_bridge *br, struct net_bridge_port *dst, struct net_device_path *path) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; if (!br_opt_get(br, BROPT_VLAN_ENABLED)) return 0; vg = nbp_vlan_group_rcu(dst); v = br_vlan_find(vg, path->bridge.vlan_id); if (!v || !br_vlan_should_use(v)) return -EINVAL; if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED)) return 0; if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW; else path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG; return 0; } int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct net_bridge_port *p; ASSERT_RTNL(); p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) vg = br_vlan_group(netdev_priv(dev)); else return -EINVAL; v = br_vlan_find(vg, vid); if (!v) return -ENOENT; p_vinfo->vid = vid; p_vinfo->flags = v->flags; if (vid == br_get_pvid(vg)) p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info); int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct net_bridge_port *p; p = br_port_get_check_rcu(dev); if (p) vg = nbp_vlan_group_rcu(p); else if (netif_is_bridge_master(dev)) vg = br_vlan_group_rcu(netdev_priv(dev)); else return -EINVAL; v = br_vlan_find(vg, vid); if (!v) return -ENOENT; p_vinfo->vid = vid; p_vinfo->flags = v->flags; if (vid == br_get_pvid(vg)) p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu); static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) { return is_vlan_dev(dev) && !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING); } static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, __always_unused struct netdev_nested_priv *priv) { return br_vlan_is_bind_vlan_dev(dev); } static bool br_vlan_has_upper_bind_vlan_dev(struct net_device *dev) { int found; rcu_read_lock(); found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn, NULL); rcu_read_unlock(); return !!found; } struct br_vlan_bind_walk_data { u16 vid; struct net_device *result; }; static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, struct netdev_nested_priv *priv) { struct br_vlan_bind_walk_data *data = priv->data; int found = 0; if (br_vlan_is_bind_vlan_dev(dev) && vlan_dev_priv(dev)->vlan_id == data->vid) { data->result = dev; found = 1; } return found; } static struct net_device * br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) { struct br_vlan_bind_walk_data data = { .vid = vid, }; struct netdev_nested_priv priv = { .data = (void *)&data, }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, &priv); rcu_read_unlock(); return data.result; } static bool br_vlan_is_dev_up(const struct net_device *dev) { return !!(dev->flags & IFF_UP) && netif_oper_up(dev); } static void br_vlan_set_vlan_dev_state(const struct net_bridge *br, struct net_device *vlan_dev) { u16 vid = vlan_dev_priv(vlan_dev)->vlan_id; struct net_bridge_vlan_group *vg; struct net_bridge_port *p; bool has_carrier = false; if (!netif_carrier_ok(br->dev)) { netif_carrier_off(vlan_dev); return; } list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) { has_carrier = true; break; } } if (has_carrier) netif_carrier_on(vlan_dev); else netif_carrier_off(vlan_dev); } static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p) { struct net_bridge_vlan_group *vg = nbp_vlan_group(p); struct net_bridge_vlan *vlan; struct net_device *vlan_dev; list_for_each_entry(vlan, &vg->vlan_list, vlist) { vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vlan->vid); if (vlan_dev) { if (br_vlan_is_dev_up(p->dev)) { if (netif_carrier_ok(p->br->dev)) netif_carrier_on(vlan_dev); } else { br_vlan_set_vlan_dev_state(p->br, vlan_dev); } } } } static void br_vlan_toggle_bridge_binding(struct net_device *br_dev, bool enable) { struct net_bridge *br = netdev_priv(br_dev); if (enable) br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true); else br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, br_vlan_has_upper_bind_vlan_dev(br_dev)); } static void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, bool linking) { struct net_bridge *br = netdev_priv(dev); if (!br_vlan_is_bind_vlan_dev(upper_dev)) return; br_vlan_toggle_bridge_binding(dev, linking); if (linking) br_vlan_set_vlan_dev_state(br, upper_dev); } struct br_vlan_link_state_walk_data { struct net_bridge *br; }; static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, struct netdev_nested_priv *priv) { struct br_vlan_link_state_walk_data *data = priv->data; if (br_vlan_is_bind_vlan_dev(vlan_dev)) br_vlan_set_vlan_dev_state(data->br, vlan_dev); return 0; } static void br_vlan_link_state_change(struct net_device *dev, struct net_bridge *br) { struct br_vlan_link_state_walk_data data = { .br = br }; struct netdev_nested_priv priv = { .data = (void *)&data, }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, &priv); rcu_read_unlock(); } /* Must be protected by RTNL. */ static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid) { struct net_device *vlan_dev; if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) return; vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vid); if (vlan_dev) br_vlan_set_vlan_dev_state(p->br, vlan_dev); } /* Must be protected by RTNL. */ int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr) { struct netdev_notifier_changeupper_info *info; struct net_bridge *br = netdev_priv(dev); int vlcmd = 0, ret = 0; bool changed = false; switch (event) { case NETDEV_REGISTER: ret = br_vlan_add(br, br->default_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL); vlcmd = RTM_NEWVLAN; break; case NETDEV_UNREGISTER: changed = !br_vlan_delete(br, br->default_pvid); vlcmd = RTM_DELVLAN; break; case NETDEV_CHANGEUPPER: info = ptr; br_vlan_upper_change(dev, info->upper_dev, info->linking); break; case NETDEV_CHANGE: case NETDEV_UP: if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING)) break; br_vlan_link_state_change(dev, br); break; } if (changed) br_vlan_notify(br, NULL, br->default_pvid, 0, vlcmd); return ret; } void br_vlan_vlan_upper_event(struct net_device *br_dev, struct net_device *vlan_dev, unsigned long event) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlan_dev); struct net_bridge *br = netdev_priv(br_dev); bool bridge_binding; switch (event) { case NETDEV_CHANGE: case NETDEV_UP: break; default: return; } bridge_binding = vlan->flags & VLAN_FLAG_BRIDGE_BINDING; br_vlan_toggle_bridge_binding(br_dev, bridge_binding); if (bridge_binding) br_vlan_set_vlan_dev_state(br, vlan_dev); else if (!bridge_binding && netif_carrier_ok(br_dev)) netif_carrier_on(vlan_dev); } /* Must be protected by RTNL. */ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) { if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) return; switch (event) { case NETDEV_CHANGE: case NETDEV_DOWN: case NETDEV_UP: br_vlan_set_all_vlan_dev_state(p); break; } } static bool br_vlan_stats_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) { struct pcpu_sw_netstats stats; struct nlattr *nest; nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_STATS); if (!nest) return false; br_vlan_get_stats(v, &stats); if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, u64_stats_read(&stats.rx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, u64_stats_read(&stats.rx_packets), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, u64_stats_read(&stats.tx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, u64_stats_read(&stats.tx_packets), BRIDGE_VLANDB_STATS_PAD)) goto out_err; nla_nest_end(skb, nest); return true; out_err: nla_nest_cancel(skb, nest); return false; } /* v_opts is used to dump the options which must be equal in the whole range */ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts, const struct net_bridge_port *p, u16 flags, bool dump_stats) { struct bridge_vlan_info info; struct nlattr *nest; nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY); if (!nest) return false; memset(&info, 0, sizeof(info)); info.vid = vid; if (flags & BRIDGE_VLAN_INFO_UNTAGGED) info.flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (flags & BRIDGE_VLAN_INFO_PVID) info.flags |= BRIDGE_VLAN_INFO_PVID; if (nla_put(skb, BRIDGE_VLANDB_ENTRY_INFO, sizeof(info), &info)) goto out_err; if (vid_range && vid < vid_range && !(flags & BRIDGE_VLAN_INFO_PVID) && nla_put_u16(skb, BRIDGE_VLANDB_ENTRY_RANGE, vid_range)) goto out_err; if (v_opts) { if (!br_vlan_opts_fill(skb, v_opts, p)) goto out_err; if (dump_stats && !br_vlan_stats_fill(skb, v_opts)) goto out_err; } nla_nest_end(skb, nest); return true; out_err: nla_nest_cancel(skb, nest); return false; } static size_t rtnl_vlan_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY */ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_ENTRY_RANGE */ + nla_total_size(sizeof(struct bridge_vlan_info)) /* BRIDGE_VLANDB_ENTRY_INFO */ + br_vlan_opts_nl_size(); /* bridge vlan options */ } void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v = NULL; struct br_vlan_msg *bvm; struct nlmsghdr *nlh; struct sk_buff *skb; int err = -ENOBUFS; struct net *net; u16 flags = 0; int ifindex; /* right now notifications are done only with rtnl held */ ASSERT_RTNL(); if (p) { ifindex = p->dev->ifindex; vg = nbp_vlan_group(p); net = dev_net(p->dev); } else { ifindex = br->dev->ifindex; vg = br_vlan_group(br); net = dev_net(br->dev); } skb = nlmsg_new(rtnl_vlan_nlmsg_size(), GFP_KERNEL); if (!skb) goto out_err; err = -EMSGSIZE; nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*bvm), 0); if (!nlh) goto out_err; bvm = nlmsg_data(nlh); memset(bvm, 0, sizeof(*bvm)); bvm->family = AF_BRIDGE; bvm->ifindex = ifindex; switch (cmd) { case RTM_NEWVLAN: /* need to find the vlan due to flags/options */ v = br_vlan_find(vg, vid); if (!v || !br_vlan_should_use(v)) goto out_kfree; flags = v->flags; if (br_get_pvid(vg) == v->vid) flags |= BRIDGE_VLAN_INFO_PVID; break; case RTM_DELVLAN: break; default: goto out_kfree; } if (!br_vlan_fill_vids(skb, vid, vid_range, v, p, flags, false)) goto out_err; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL); return; out_err: rtnl_set_sk_err(net, RTNLGRP_BRVLAN, err); out_kfree: kfree_skb(skb); } /* check if v_curr can enter a range ending in range_end */ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end) { return v_curr->vid - range_end->vid == 1 && range_end->flags == v_curr->flags && br_vlan_opts_eq_range(v_curr, range_end); } static int br_vlan_dump_dev(const struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb, u32 dump_flags) { struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL); bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); struct net_bridge_vlan_group *vg; int idx = 0, s_idx = cb->args[1]; struct nlmsghdr *nlh = NULL; struct net_bridge_port *p; struct br_vlan_msg *bvm; struct net_bridge *br; int err = 0; u16 pvid; if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) return -EINVAL; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group_rcu(br); p = NULL; } else { /* global options are dumped only for bridge devices */ if (dump_global) return 0; p = br_port_get_rcu(dev); if (WARN_ON(!p)) return -EINVAL; vg = nbp_vlan_group_rcu(p); br = p->br; } if (!vg) return 0; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWVLAN, sizeof(*bvm), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; bvm = nlmsg_data(nlh); memset(bvm, 0, sizeof(*bvm)); bvm->family = PF_BRIDGE; bvm->ifindex = dev->ifindex; pvid = br_get_pvid(vg); /* idx must stay at range's beginning until it is filled in */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { if (!dump_global && !br_vlan_should_use(v)) continue; if (idx < s_idx) { idx++; continue; } if (!range_start) { range_start = v; range_end = v; continue; } if (dump_global) { if (br_vlan_global_opts_can_enter_range(v, range_end)) goto update_end; if (!br_vlan_global_opts_fill(skb, range_start->vid, range_end->vid, range_start)) { err = -EMSGSIZE; break; } /* advance number of filled vlans */ idx += range_end->vid - range_start->vid + 1; range_start = v; } else if (dump_stats || v->vid == pvid || !br_vlan_can_enter_range(v, range_end)) { u16 vlan_flags = br_vlan_flags(range_start, pvid); if (!br_vlan_fill_vids(skb, range_start->vid, range_end->vid, range_start, p, vlan_flags, dump_stats)) { err = -EMSGSIZE; break; } /* advance number of filled vlans */ idx += range_end->vid - range_start->vid + 1; range_start = v; } update_end: range_end = v; } /* err will be 0 and range_start will be set in 3 cases here: * - first vlan (range_start == range_end) * - last vlan (range_start == range_end, not in range) * - last vlan range (range_start != range_end, in range) */ if (!err && range_start) { if (dump_global && !br_vlan_global_opts_fill(skb, range_start->vid, range_end->vid, range_start)) err = -EMSGSIZE; else if (!dump_global && !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, range_start, p, br_vlan_flags(range_start, pvid), dump_stats)) err = -EMSGSIZE; } cb->args[1] = err ? idx : 0; nlmsg_end(skb, nlh); return err; } static const struct nla_policy br_vlan_db_dump_pol[BRIDGE_VLANDB_DUMP_MAX + 1] = { [BRIDGE_VLANDB_DUMP_FLAGS] = { .type = NLA_U32 }, }; static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *dtb[BRIDGE_VLANDB_DUMP_MAX + 1]; int idx = 0, err = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); struct br_vlan_msg *bvm; struct net_device *dev; u32 dump_flags = 0; err = nlmsg_parse(cb->nlh, sizeof(*bvm), dtb, BRIDGE_VLANDB_DUMP_MAX, br_vlan_db_dump_pol, cb->extack); if (err < 0) return err; bvm = nlmsg_data(cb->nlh); if (dtb[BRIDGE_VLANDB_DUMP_FLAGS]) dump_flags = nla_get_u32(dtb[BRIDGE_VLANDB_DUMP_FLAGS]); rcu_read_lock(); if (bvm->ifindex) { dev = dev_get_by_index_rcu(net, bvm->ifindex); if (!dev) { err = -ENODEV; goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); /* if the dump completed without an error we return 0 here */ if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { if (idx < s_idx) goto skip; err = br_vlan_dump_dev(dev, skb, cb, dump_flags); if (err == -EMSGSIZE) break; skip: idx++; } } cb->args[0] = idx; rcu_read_unlock(); return skb->len; out_err: rcu_read_unlock(); return err; } static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = { [BRIDGE_VLANDB_ENTRY_INFO] = NLA_POLICY_EXACT_LEN(sizeof(struct bridge_vlan_info)), [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 }, [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 }, [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED }, [BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 }, [BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT }, [BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 }, [BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1), }; static int br_vlan_rtm_process_one(struct net_device *dev, const struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct bridge_vlan_info *vinfo, vrange_end, *vinfo_last = NULL; struct nlattr *tb[BRIDGE_VLANDB_ENTRY_MAX + 1]; bool changed = false, skip_processing = false; struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; int err = 0, cmdmap = 0; struct net_bridge *br; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { p = br_port_get_rtnl(dev); if (WARN_ON(!p)) return -ENODEV; br = p->br; vg = nbp_vlan_group(p); } if (WARN_ON(!vg)) return -ENODEV; err = nla_parse_nested(tb, BRIDGE_VLANDB_ENTRY_MAX, attr, br_vlan_db_policy, extack); if (err) return err; if (!tb[BRIDGE_VLANDB_ENTRY_INFO]) { NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry info"); return -EINVAL; } memset(&vrange_end, 0, sizeof(vrange_end)); vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]); if (vinfo->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN | BRIDGE_VLAN_INFO_RANGE_END)) { NL_SET_ERR_MSG_MOD(extack, "Old-style vlan ranges are not allowed when using RTM vlan calls"); return -EINVAL; } if (!br_vlan_valid_id(vinfo->vid, extack)) return -EINVAL; if (tb[BRIDGE_VLANDB_ENTRY_RANGE]) { vrange_end.vid = nla_get_u16(tb[BRIDGE_VLANDB_ENTRY_RANGE]); /* validate user-provided flags without RANGE_BEGIN */ vrange_end.flags = BRIDGE_VLAN_INFO_RANGE_END | vinfo->flags; vinfo->flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; /* vinfo_last is the range start, vinfo the range end */ vinfo_last = vinfo; vinfo = &vrange_end; if (!br_vlan_valid_id(vinfo->vid, extack) || !br_vlan_valid_range(vinfo, vinfo_last, extack)) return -EINVAL; } switch (cmd) { case RTM_NEWVLAN: cmdmap = RTM_SETLINK; skip_processing = !!(vinfo->flags & BRIDGE_VLAN_INFO_ONLY_OPTS); break; case RTM_DELVLAN: cmdmap = RTM_DELLINK; break; } if (!skip_processing) { struct bridge_vlan_info *tmp_last = vinfo_last; /* br_process_vlan_info may overwrite vinfo_last */ err = br_process_vlan_info(br, p, cmdmap, vinfo, &tmp_last, &changed, extack); /* notify first if anything changed */ if (changed) br_ifinfo_notify(cmdmap, br, p); if (err) return err; } /* deal with options */ if (cmd == RTM_NEWVLAN) { struct net_bridge_vlan *range_start, *range_end; if (vinfo_last) { range_start = br_vlan_find(vg, vinfo_last->vid); range_end = br_vlan_find(vg, vinfo->vid); } else { range_start = br_vlan_find(vg, vinfo->vid); range_end = range_start; } err = br_vlan_process_options(br, p, range_start, range_end, tb, extack); } return err; } static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct br_vlan_msg *bvm; struct net_device *dev; struct nlattr *attr; int err, vlans = 0; int rem; /* this should validate the header and check for remaining bytes */ err = nlmsg_parse(nlh, sizeof(*bvm), NULL, BRIDGE_VLANDB_MAX, NULL, extack); if (err < 0) return err; bvm = nlmsg_data(nlh); dev = __dev_get_by_index(net, bvm->ifindex); if (!dev) return -ENODEV; if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) { NL_SET_ERR_MSG_MOD(extack, "The device is not a valid bridge or bridge port"); return -EINVAL; } nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) { switch (nla_type(attr)) { case BRIDGE_VLANDB_ENTRY: err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type, extack); break; case BRIDGE_VLANDB_GLOBAL_OPTIONS: err = br_vlan_rtm_process_global_options(dev, attr, nlh->nlmsg_type, extack); break; default: continue; } vlans++; if (err) break; } if (!vlans) { NL_SET_ERR_MSG_MOD(extack, "No vlans found to process"); err = -EINVAL; } return err; } static const struct rtnl_msg_handler br_vlan_rtnl_msg_handlers[] = { {THIS_MODULE, PF_BRIDGE, RTM_NEWVLAN, br_vlan_rtm_process, NULL, 0}, {THIS_MODULE, PF_BRIDGE, RTM_DELVLAN, br_vlan_rtm_process, NULL, 0}, {THIS_MODULE, PF_BRIDGE, RTM_GETVLAN, NULL, br_vlan_rtm_dump, 0}, }; int br_vlan_rtnl_init(void) { return rtnl_register_many(br_vlan_rtnl_msg_handlers); } void br_vlan_rtnl_uninit(void) { rtnl_unregister_many(br_vlan_rtnl_msg_handlers); } |
| 179 153 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | /* SPDX-License-Identifier: GPL-2.0 * * FUSE: Filesystem in Userspace * Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> */ #ifndef _FS_FUSE_DEV_I_H #define _FS_FUSE_DEV_I_H #include <linux/types.h> /* Ordinary requests have even IDs, while interrupts IDs are odd */ #define FUSE_INT_REQ_BIT (1ULL << 0) #define FUSE_REQ_ID_STEP (1ULL << 1) extern struct wait_queue_head fuse_dev_waitq; struct fuse_arg; struct fuse_args; struct fuse_pqueue; struct fuse_req; struct fuse_iqueue; struct fuse_forget_link; struct fuse_copy_state { struct fuse_req *req; struct iov_iter *iter; struct pipe_buffer *pipebufs; struct pipe_buffer *currbuf; struct pipe_inode_info *pipe; unsigned long nr_segs; struct page *pg; unsigned int len; unsigned int offset; bool write:1; bool move_folios:1; bool is_uring:1; struct { unsigned int copied_sz; /* copied size into the user buffer */ } ring; }; #define FUSE_DEV_SYNC_INIT ((struct fuse_dev *) 1) #define FUSE_DEV_PTR_MASK (~1UL) static inline struct fuse_dev *__fuse_get_dev(struct file *file) { /* * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ struct fuse_dev *fud = READ_ONCE(file->private_data); return (typeof(fud)) ((unsigned long) fud & FUSE_DEV_PTR_MASK); } struct fuse_dev *fuse_get_dev(struct file *file); unsigned int fuse_req_hash(u64 unique); struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique); void fuse_dev_end_requests(struct list_head *head); void fuse_copy_init(struct fuse_copy_state *cs, bool write, struct iov_iter *iter); int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, unsigned int argpages, struct fuse_arg *args, int zeroing); int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, unsigned int nbytes); void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget); void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock); bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list); #endif |
| 219 262 981 126 1244 86 22 21 30 21 10 22 97 112 112 112 112 112 112 110 29 20 35 105 104 22 16 35 32 35 28 112 50 13 49 50 13 50 3 50 44 50 50 17 44 44 50 45 13 13 13 50 71 1 245 4169 9 3752 3744 3743 3752 15015 14942 14947 831 657 647 78 67 259 5 5 5 2919 142 31 5606 203 5606 148 11 34 31 23 27 7 27 27 2540 87 1987 102 644 645 645 26 26 644 617 114 114 506 195 512 504 504 165 334 59 55 8 8 197 1360 7 250 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal.h: mm/ internal definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H #include <linux/fs.h> #include <linux/khugepaged.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/pagemap.h> #include <linux/pagewalk.h> #include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/swap_cgroup.h> #include <linux/tracepoint-defs.h> /* Internal core VMA manipulation functions. */ #include "vma.h" struct folio_batch; /* * Maintains state across a page table move. The operation assumes both source * and destination VMAs already exist and are specified by the user. * * Partial moves are permitted, but the old and new ranges must both reside * within a VMA. * * mmap lock must be held in write and VMA write locks must be held on any VMA * that is visible. * * Use the PAGETABLE_MOVE() macro to initialise this struct. * * The old_addr and new_addr fields are updated as the page table move is * executed. * * NOTE: The page table move is affected by reading from [old_addr, old_end), * and old_addr may be updated for better page table alignment, so len_in * represents the length of the range being copied as specified by the user. */ struct pagetable_move_control { struct vm_area_struct *old; /* Source VMA. */ struct vm_area_struct *new; /* Destination VMA. */ unsigned long old_addr; /* Address from which the move begins. */ unsigned long old_end; /* Exclusive address at which old range ends. */ unsigned long new_addr; /* Address to move page tables to. */ unsigned long len_in; /* Bytes to remap specified by user. */ bool need_rmap_locks; /* Do rmap locks need to be taken? */ bool for_stack; /* Is this an early temp stack being moved? */ }; #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ struct pagetable_move_control name = { \ .old = old_, \ .new = new_, \ .old_addr = old_addr_, \ .old_end = (old_addr_) + (len_), \ .new_addr = new_addr_, \ .len_in = len_, \ } /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints * about IO, FS and watermark checking while ignoring placement * hints such as HIGHMEM usage. */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ __GFP_NOLOCKDEP) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation cpuset and node placement constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) /* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) /* * Different from WARN_ON_ONCE(), no warning will be issued * when we specify __GFP_NOWARN. */ #define WARN_ON_ONCE_GFP(cond, gfp) ({ \ static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \ __warned = true; \ WARN_ON(1); \ } \ unlikely(__ret_warn_once); \ }) void page_writeback_init(void); /* * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages, * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently * leaves nr_pages_mapped at 0, but avoid surprise if it participates later. */ #define ENTIRELY_MAPPED 0x800000 #define FOLIO_PAGES_MAPPED (ENTIRELY_MAPPED - 1) /* * Flags passed to __show_mem() and show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ /* * How many individual pages have an elevated _mapcount. Excludes * the folio's entire_mapcount. * * Don't use this function outside of debugging code. */ static inline int folio_nr_pages_mapped(const struct folio *folio) { if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) return -1; return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; } /* * Retrieve the first entry of a folio based on a provided entry within the * folio. We cannot rely on folio->swap as there is no guarantee that it has * been initialized. Used for calling arch_swap_restore() */ static inline swp_entry_t folio_swap(swp_entry_t entry, const struct folio *folio) { swp_entry_t swap = { .val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)), }; return swap; } static inline void *folio_raw_mapping(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; return (void *)(mapping & ~FOLIO_MAPPING_FLAGS); } /* * This is a file-backed mapping, and is about to be memory mapped - invoke its * mmap hook and safely handle error conditions. On error, VMA hooks will be * mutated. * * @file: File which backs the mapping. * @vma: VMA which we are mapping. * * Returns: 0 if success, error otherwise. */ static inline int mmap_file(struct file *file, struct vm_area_struct *vma) { int err = vfs_mmap(file, vma); if (likely(!err)) return 0; /* * OK, we tried to call the file hook for mmap(), but an error * arose. The mapping is in an inconsistent state and we most not invoke * any further hooks on it. */ vma->vm_ops = &vma_dummy_vm_ops; return err; } /* * If the VMA has a close hook then close it, and since closing it might leave * it in an inconsistent state which makes the use of any hooks suspect, clear * them down by installing dummy empty hooks. */ static inline void vma_close(struct vm_area_struct *vma) { if (vma->vm_ops && vma->vm_ops->close) { vma->vm_ops->close(vma); /* * The mapping is in an inconsistent state, and no further hooks * may be invoked upon it. */ vma->vm_ops = &vma_dummy_vm_ops; } } #ifdef CONFIG_MMU /* Flags for folio_pte_batch(). */ typedef int __bitwise fpb_t; /* Compare PTEs respecting the dirty bit. */ #define FPB_RESPECT_DIRTY ((__force fpb_t)BIT(0)) /* Compare PTEs respecting the soft-dirty bit. */ #define FPB_RESPECT_SOFT_DIRTY ((__force fpb_t)BIT(1)) /* Compare PTEs respecting the writable bit. */ #define FPB_RESPECT_WRITE ((__force fpb_t)BIT(2)) /* * Merge PTE write bits: if any PTE in the batch is writable, modify the * PTE at @ptentp to be writable. */ #define FPB_MERGE_WRITE ((__force fpb_t)BIT(3)) /* * Merge PTE young and dirty bits: if any PTE in the batch is young or dirty, * modify the PTE at @ptentp to be young or dirty, respectively. */ #define FPB_MERGE_YOUNG_DIRTY ((__force fpb_t)BIT(4)) static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) { if (!(flags & FPB_RESPECT_DIRTY)) pte = pte_mkclean(pte); if (likely(!(flags & FPB_RESPECT_SOFT_DIRTY))) pte = pte_clear_soft_dirty(pte); if (likely(!(flags & FPB_RESPECT_WRITE))) pte = pte_wrprotect(pte); return pte_mkold(pte); } /** * folio_pte_batch_flags - detect a PTE batch for a large folio * @folio: The large folio to detect a PTE batch for. * @vma: The VMA. Only relevant with FPB_MERGE_WRITE, otherwise can be NULL. * @ptep: Page table pointer for the first entry. * @ptentp: Pointer to a COPY of the first page table entry whose flags this * function updates based on @flags if appropriate. * @max_nr: The maximum number of table entries to consider. * @flags: Flags to modify the PTE batch semantics. * * Detect a PTE batch: consecutive (present) PTEs that map consecutive * pages of the same large folio in a single VMA and a single page table. * * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, * the accessed bit, writable bit, dirty bit (unless FPB_RESPECT_DIRTY is set) * and soft-dirty bit (unless FPB_RESPECT_SOFT_DIRTY is set). * * @ptep must map any page of the folio. max_nr must be at least one and * must be limited by the caller so scanning cannot exceed a single VMA and * a single page table. * * Depending on the FPB_MERGE_* flags, the pte stored at @ptentp will * be updated: it's crucial that a pointer to a COPY of the first * page table entry, obtained through ptep_get(), is provided as @ptentp. * * This function will be inlined to optimize based on the input parameters; * consider using folio_pte_batch() instead if applicable. * * Return: the number of table entries in the batch. */ static inline unsigned int folio_pte_batch_flags(struct folio *folio, struct vm_area_struct *vma, pte_t *ptep, pte_t *ptentp, unsigned int max_nr, fpb_t flags) { bool any_writable = false, any_young = false, any_dirty = false; pte_t expected_pte, pte = *ptentp; unsigned int nr, cur_nr; VM_WARN_ON_FOLIO(!pte_present(pte), folio); VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); /* * Ensure this is a pointer to a copy not a pointer into a page table. * If this is a stack value, it won't be a valid virtual address, but * that's fine because it also cannot be pointing into the page table. */ VM_WARN_ON(virt_addr_valid(ptentp) && PageTable(virt_to_page(ptentp))); /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */ max_nr = min_t(unsigned long, max_nr, folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte)); nr = pte_batch_hint(ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = ptep + nr; while (nr < max_nr) { pte = ptep_get(ptep); if (!pte_same(__pte_batch_clear_ignored(pte, flags), expected_pte)) break; if (flags & FPB_MERGE_WRITE) any_writable |= pte_write(pte); if (flags & FPB_MERGE_YOUNG_DIRTY) { any_young |= pte_young(pte); any_dirty |= pte_dirty(pte); } cur_nr = pte_batch_hint(ptep, pte); expected_pte = pte_advance_pfn(expected_pte, cur_nr); ptep += cur_nr; nr += cur_nr; } if (any_writable) *ptentp = pte_mkwrite(*ptentp, vma); if (any_young) *ptentp = pte_mkyoung(*ptentp); if (any_dirty) *ptentp = pte_mkdirty(*ptentp); return min(nr, max_nr); } unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, unsigned int max_nr); /** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * * Moves the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) { swp_entry_t entry = pte_to_swp_entry(pte); pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry), (swp_offset(entry) + delta))); if (pte_swp_soft_dirty(pte)) new = pte_swp_mksoft_dirty(new); if (pte_swp_exclusive(pte)) new = pte_swp_mkexclusive(new); if (pte_swp_uffd_wp(pte)) new = pte_swp_mkuffd_wp(new); return new; } /** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_next_swp_offset(pte_t pte) { return pte_move_swp_offset(pte, 1); } /** * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries * @start_ptep: Page table pointer for the first entry. * @max_nr: The maximum number of table entries to consider. * @pte: Page table entry for the first entry. * * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs * containing swap entries all with consecutive offsets and targeting the same * swap type, all with matching swp pte bits. * * max_nr must be at least one and must be limited by the caller so scanning * cannot exceed a single page table. * * Return: the number of table entries in the batch. */ static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) { pte_t expected_pte = pte_next_swp_offset(pte); const pte_t *end_ptep = start_ptep + max_nr; swp_entry_t entry = pte_to_swp_entry(pte); pte_t *ptep = start_ptep + 1; unsigned short cgroup_id; VM_WARN_ON(max_nr < 1); VM_WARN_ON(!is_swap_pte(pte)); VM_WARN_ON(non_swap_entry(entry)); cgroup_id = lookup_swap_cgroup_id(entry); while (ptep < end_ptep) { pte = ptep_get(ptep); if (!pte_same(pte, expected_pte)) break; if (lookup_swap_cgroup_id(pte_to_swp_entry(pte)) != cgroup_id) break; expected_pte = pte_next_swp_offset(expected_pte); ptep++; } return ptep - start_ptep; } #endif /* CONFIG_MMU */ void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, int nr_throttled); static inline void acct_reclaim_writeback(struct folio *folio) { pg_data_t *pgdat = folio_pgdat(folio); int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); if (nr_throttled) __acct_reclaim_writeback(pgdat, folio, nr_throttled); } static inline void wake_throttle_isolated(pg_data_t *pgdat) { wait_queue_head_t *wqh; wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; if (waitqueue_active(wqh)) wake_up(wqh); } vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf); static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) { vm_fault_t ret = __vmf_anon_prepare(vmf); if (unlikely(ret & VM_FAULT_RETRY)) vma_end_read(vmf->vma); return ret; } vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); bool __folio_end_writeback(struct folio *folio); void deactivate_file_folio(struct folio *folio); void folio_activate(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); struct zap_details; void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details); void zap_page_range_single_batched(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct zap_details *details); int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp); void page_cache_ra_order(struct readahead_control *, struct file_ra_state *); void force_page_cache_ra(struct readahead_control *, unsigned long nr); static inline void force_page_cache_readahead(struct address_space *mapping, struct file *file, pgoff_t index, unsigned long nr_to_read) { DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index); force_page_cache_ra(&ractl, nr_to_read); } unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end); long mapping_evict_folio(struct address_space *mapping, struct folio *folio); unsigned long mapping_try_invalidate(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_failed); /** * folio_evictable - Test whether a folio is evictable. * @folio: The folio to test. * * Test whether @folio is evictable -- i.e., should be placed on * active/inactive lists vs unevictable list. * * Reasons folio might not be evictable: * 1. folio's mapping marked unevictable * 2. One of the pages in the folio is part of an mlocked VMA */ static inline bool folio_evictable(struct folio *folio) { bool ret; /* Prevent address_space of inode and swap cache from being freed */ rcu_read_lock(); ret = !mapping_unevictable(folio_mapping(folio)) && !folio_test_mlocked(folio); rcu_read_unlock(); return ret; } /* * Turn a non-refcounted page (->_refcount == 0) into refcounted with * a count of one. */ static inline void set_page_refcounted(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page_ref_count(page), page); set_page_count(page, 1); } /* * Return true if a folio needs ->release_folio() calling upon it. */ static inline bool folio_needs_release(struct folio *folio) { struct address_space *mapping = folio_mapping(folio); return folio_has_private(folio) || (mapping && mapping_release_always(mapping)); } extern unsigned long highest_memmap_pfn; /* * Maximum number of reclaim retries without progress before the OOM * killer is consider the only way forward. */ #define MAX_RECLAIM_RETRIES 16 /* * in mm/vmscan.c: */ bool folio_isolate_lru(struct folio *folio); void folio_putback_lru(struct folio *folio); extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); #ifdef CONFIG_NUMA int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat); #else static inline int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat) { return 0; } #endif /* * in mm/rmap.c: */ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); /* * in mm/page_alloc.c */ #define K(x) ((x) << (PAGE_SHIFT-10)) extern char * const zone_names[MAX_NR_ZONES]; /* perform sanity checks on struct pages being allocated or freed */ DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); extern int min_free_kbytes; extern int defrag_mode; void setup_per_zone_wmarks(void); void calculate_min_free_kbytes(void); int __meminit init_per_zone_wmark_min(void); void page_alloc_sysctl_init(void); /* * Structure for holding the mostly immutable allocation parameters passed * between functions involved in allocations, including the alloc_pages* * family of functions. * * nodemask, migratetype and highest_zoneidx are initialized only once in * __alloc_pages() and then never change. * * zonelist, preferred_zone and highest_zoneidx are set first in * __alloc_pages() for the fast path, and might be later changed * in __alloc_pages_slowpath(). All other functions pass the whole structure * by a const pointer. */ struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zoneref *preferred_zoneref; int migratetype; /* * highest_zoneidx represents highest usable zone index of * the allocation request. Due to the nature of the zone, * memory on lower zone than the highest_zoneidx will be * protected by lowmem_reserve[highest_zoneidx]. * * highest_zoneidx is also used by reclaim/compaction to limit * the target zone since higher zone than this index cannot be * usable for this allocation request. */ enum zone_type highest_zoneidx; bool spread_dirty_pages; }; /* * This function returns the order of a free page in the buddy system. In * general, page_zone(page)->lock must be held by the caller to prevent the * page from being allocated in parallel and returning garbage as the order. * If a caller does not hold page_zone(page)->lock, it must guarantee that the * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use buddy_order_unsafe() below. */ static inline unsigned int buddy_order(struct page *page) { /* PageBuddy() must be checked by the caller */ return page_private(page); } /* * Like buddy_order(), but for callers who cannot afford to hold the zone lock. * PageBuddy() should be checked first by the caller to minimize race window, * and invalid values must be handled gracefully. * * READ_ONCE is used so that if the caller assigns the result into a local * variable and e.g. tests it for valid range before using, the compiler cannot * decide to remove the variable and inline the page_private(page) multiple * times, potentially observing different values in the tests and the actual * use of the result. */ #define buddy_order_unsafe(page) READ_ONCE(page_private(page)) /* * This function checks whether a page is free && is the buddy * we can coalesce a page and its buddy if * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * * For recording whether a page is in the buddy system, we set PageBuddy. * Setting, clearing, and testing PageBuddy is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ static inline bool page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { if (!page_is_guard(buddy) && !PageBuddy(buddy)) return false; if (buddy_order(buddy) != order) return false; /* * zone check is done late to avoid uselessly calculating * zone/node ids for pages that could never merge. */ if (page_zone_id(page) != page_zone_id(buddy)) return false; VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return true; } /* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) * * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER */ static inline unsigned long __find_buddy_pfn(unsigned long page_pfn, unsigned int order) { return page_pfn ^ (1 << order); } /* * Find the buddy of @page and validate it. * @page: The input page * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the * function is used in the performance-critical __free_one_page(). * @order: The order of the page * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to * page_to_pfn(). * * The found buddy can be a non PageBuddy, out of @page's zone, or its order is * not the same as @page. The validation is necessary before use it. * * Return: the found buddy page or NULL if not found. */ static inline struct page *find_buddy_page_pfn(struct page *page, unsigned long pfn, unsigned int order, unsigned long *buddy_pfn) { unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order); struct page *buddy; buddy = page + (__buddy_pfn - pfn); if (buddy_pfn) *buddy_pfn = __buddy_pfn; if (page_is_buddy(page, buddy, order)) return buddy; return NULL; } extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone); static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone) { if (zone->contiguous) return pfn_to_page(start_pfn); return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); } void set_zone_contiguous(struct zone *zone); bool pfn_range_intersects_zones(int nid, unsigned long start_pfn, unsigned long nr_pages); static inline void clear_zone_contiguous(struct zone *zone) { zone->contiguous = false; } extern int __isolate_free_page(struct page *page, unsigned int order); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order, enum meminit_context context); /* * This will have no effect, other than possibly generating a warning, if the * caller passes in a non-large folio. */ static inline void folio_set_order(struct folio *folio, unsigned int order) { if (WARN_ON_ONCE(!order || !folio_test_large(folio))) return; VM_WARN_ON_ONCE(order > MAX_FOLIO_ORDER); folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order; #ifdef NR_PAGES_IN_LARGE_FOLIO folio->_nr_pages = 1U << order; #endif } bool __folio_unqueue_deferred_split(struct folio *folio); static inline bool folio_unqueue_deferred_split(struct folio *folio) { if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) return false; /* * At this point, there is no one trying to add the folio to * deferred_list. If folio is not in deferred_list, it's safe * to check without acquiring the split_queue_lock. */ if (data_race(list_empty(&folio->_deferred_list))) return false; return __folio_unqueue_deferred_split(folio); } static inline struct folio *page_rmappable_folio(struct page *page) { struct folio *folio = (struct folio *)page; if (folio && folio_test_large(folio)) folio_set_large_rmappable(folio); return folio; } static inline void prep_compound_head(struct page *page, unsigned int order) { struct folio *folio = (struct folio *)page; folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) atomic_set(&folio->_nr_pages_mapped, 0); if (IS_ENABLED(CONFIG_MM_ID)) { folio->_mm_ids = 0; folio->_mm_id_mapcount[0] = -1; folio->_mm_id_mapcount[1] = -1; } if (IS_ENABLED(CONFIG_64BIT) || order > 1) { atomic_set(&folio->_pincount, 0); atomic_set(&folio->_entire_mapcount, -1); } if (order > 1) INIT_LIST_HEAD(&folio->_deferred_list); } static inline void prep_compound_tail(struct page *head, int tail_idx) { struct page *p = head + tail_idx; p->mapping = TAIL_MAPPING; set_compound_head(p, head); set_page_private(p, 0); } void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern bool free_pages_prepare(struct page *page, unsigned int order); extern int user_min_free_kbytes; struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid, nodemask_t *); #define __alloc_frozen_pages(...) \ alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__)) void free_frozen_pages(struct page *page, unsigned int order); void free_unref_folios(struct folio_batch *fbatch); #ifdef CONFIG_NUMA struct page *alloc_frozen_pages_noprof(gfp_t, unsigned int order); #else static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order) { return __alloc_frozen_pages_noprof(gfp, order, numa_node_id(), NULL); } #endif #define alloc_frozen_pages(...) \ alloc_hooks(alloc_frozen_pages_noprof(__VA_ARGS__)) struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order); #define alloc_frozen_pages_nolock(...) \ alloc_hooks(alloc_frozen_pages_nolock_noprof(__VA_ARGS__)) extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); extern void zone_pcp_enable(struct zone *zone); extern void zone_pcp_init(struct zone *zone); extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, int nid, bool exact_nid); void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int, bool); #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* * in mm/compaction.c */ /* * compact_control is used to track pages being migrated and the free pages * they are being migrated to during memory compaction. The free_pfn starts * at the end of a zone and migrate_pfn begins at the start. Movable pages * are moved to the end of a zone during a compaction run and the run * completes when free_pfn <= migrate_pfn */ struct compact_control { struct list_head freepages[NR_PAGE_ORDERS]; /* List of free pages to migrate to */ struct list_head migratepages; /* List of pages being migrated */ unsigned int nr_freepages; /* Number of isolated free pages */ unsigned int nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ /* * Acts as an in/out parameter to page isolation for migration. * isolate_migratepages uses it as a search base. * isolate_migratepages_block will update the value to the next pfn * after the last isolated one. */ unsigned long migrate_pfn; unsigned long fast_start_pfn; /* a pfn to start linear scan from */ struct zone *zone; unsigned long total_migrate_scanned; unsigned long total_free_scanned; unsigned short fast_search_fail;/* failures to use free list searches */ short search_order; /* order to start a fast search at */ const gfp_t gfp_mask; /* gfp mask of a direct compactor */ int order; /* order a direct compactor needs */ int migratetype; /* migratetype of direct compactor */ const unsigned int alloc_flags; /* alloc flags of a direct compactor */ const int highest_zoneidx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock contention */ bool finish_pageblock; /* Scan the remainder of a pageblock. Used * when there are potentially transient * isolation or migration failures to * ensure forward progress. */ bool alloc_contig; /* alloc_contig_range allocation */ }; /* * Used in direct compaction when a page should be taken from the freelists * immediately when one is created during the free path. */ struct capture_control { struct compact_control *cc; struct page *page; }; unsigned long isolate_freepages_range(struct compact_control *cc, unsigned long start_pfn, unsigned long end_pfn); int isolate_migratepages_range(struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn); /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ struct cma; #ifdef CONFIG_CMA void *cma_reserve_early(struct cma *cma, unsigned long size); void init_cma_pageblock(struct page *page); #else static inline void *cma_reserve_early(struct cma *cma, unsigned long size) { return NULL; } static inline void init_cma_pageblock(struct page *page) { } #endif int find_suitable_fallback(struct free_area *area, unsigned int order, int migratetype, bool claimable); static inline bool free_area_empty(struct free_area *area, int migratetype) { return list_empty(&area->free_list[migratetype]); } /* mm/util.c */ struct anon_vma *folio_anon_vma(const struct folio *folio); #ifdef CONFIG_MMU void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); extern long faultin_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool write, int *locked); bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags, unsigned long bytes); /* * NOTE: This function can't tell whether the folio is "fully mapped" in the * range. * "fully mapped" means all the pages of folio is associated with the page * table of range while this function just check whether the folio range is * within the range [start, end). Function caller needs to do page table * check if it cares about the page table association. * * Typical usage (like mlock or madvise) is: * Caller knows at least 1 page of folio is associated with page table of VMA * and the range [start, end) is intersect with the VMA range. Caller wants * to know whether the folio is fully associated with the range. It calls * this function to check whether the folio is in the range first. Then checks * the page table to know whether the folio is fully mapped to the range. */ static inline bool folio_within_range(struct folio *folio, struct vm_area_struct *vma, unsigned long start, unsigned long end) { pgoff_t pgoff, addr; unsigned long vma_pglen = vma_pages(vma); VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio); if (start > end) return false; if (start < vma->vm_start) start = vma->vm_start; if (end > vma->vm_end) end = vma->vm_end; pgoff = folio_pgoff(folio); /* if folio start address is not in vma range */ if (!in_range(pgoff, vma->vm_pgoff, vma_pglen)) return false; addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); return !(addr < start || end - addr < folio_size(folio)); } static inline bool folio_within_vma(struct folio *folio, struct vm_area_struct *vma) { return folio_within_range(folio, vma, vma->vm_start, vma->vm_end); } /* * mlock_vma_folio() and munlock_vma_folio(): * should be called with vma's mmap_lock held for read or write, * under page table lock for the pte/pmd being added or removed. * * mlock is usually called at the end of folio_add_*_rmap_*(), munlock at * the end of folio_remove_rmap_*(); but new anon folios are managed by * folio_add_lru_vma() calling mlock_new_folio(). */ void mlock_folio(struct folio *folio); static inline void mlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * The VM_SPECIAL check here serves two purposes. * 1) VM_IO check prevents migration from double-counting during mlock. * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED * is never left set on a VM_SPECIAL vma, there is an interval while * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may * still be set while VM_SPECIAL bits are added: so ignore it then. */ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED)) mlock_folio(folio); } void munlock_folio(struct folio *folio); static inline void munlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * munlock if the function is called. Ideally, we should only * do munlock if any page of folio is unmapped from VMA and * cause folio not fully mapped to VMA. * * But it's not easy to confirm that's the situation. So we * always munlock the folio and page reclaim will correct it * if it's wrong. */ if (unlikely(vma->vm_flags & VM_LOCKED)) munlock_folio(folio); } void mlock_new_folio(struct folio *folio); bool need_mlock_drain(int cpu); void mlock_drain_local(void); void mlock_drain_remote(int cpu); extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /** * vma_address - Find the virtual address a page range is mapped at * @vma: The vma which maps this object. * @pgoff: The page offset within its object. * @nr_pages: The number of pages to consider. * * If any page in this range is mapped by this VMA, return the first address * where any of these pages appear. Otherwise, return -EFAULT. */ static inline unsigned long vma_address(const struct vm_area_struct *vma, pgoff_t pgoff, unsigned long nr_pages) { unsigned long address; if (pgoff >= vma->vm_pgoff) { address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address >= vma->vm_end) address = -EFAULT; } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) { /* Test above avoids possibility of wrap to 0 on 32-bit */ address = vma->vm_start; } else { address = -EFAULT; } return address; } /* * Then at what user virtual address will none of the range be found in vma? * Assumes that vma_address() already returned a good starting address. */ static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw) { struct vm_area_struct *vma = pvmw->vma; pgoff_t pgoff; unsigned long address; /* Common case, plus ->pgoff is invalid for KSM */ if (pvmw->nr_pages == 1) return pvmw->address + PAGE_SIZE; pgoff = pvmw->pgoff + pvmw->nr_pages; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address > vma->vm_end) address = vma->vm_end; return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, struct file *fpin) { int flags = vmf->flags; if (fpin) return fpin; /* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_lock if only * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. */ if (fault_flag_allow_retry_first(flags) && !(flags & FAULT_FLAG_RETRY_NOWAIT)) { fpin = get_file(vmf->vma->vm_file); release_fault_lock(vmf); } return fpin; } #else /* !CONFIG_MMU */ static inline void unmap_mapping_folio(struct folio *folio) { } static inline void mlock_new_folio(struct folio *folio) { } static inline bool need_mlock_drain(int cpu) { return false; } static inline void mlock_drain_local(void) { } static inline void mlock_drain_remote(int cpu) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) { } #endif /* !CONFIG_MMU */ /* Memory initialisation debug and verification */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT DECLARE_STATIC_KEY_TRUE(deferred_pages); bool __init deferred_grow_zone(struct zone *zone, unsigned int order); #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ void init_deferred_page(unsigned long pfn, int nid); enum mminit_level { MMINIT_WARNING, MMINIT_VERIFY, MMINIT_TRACE }; #ifdef CONFIG_DEBUG_MEMORY_INIT extern int mminit_loglevel; #define mminit_dprintk(level, prefix, fmt, arg...) \ do { \ if (level < mminit_loglevel) { \ if (level <= MMINIT_WARNING) \ pr_warn("mminit::" prefix " " fmt, ##arg); \ else \ printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ } \ } while (0) extern void mminit_verify_pageflags_layout(void); extern void mminit_verify_zonelist(void); #else static inline void mminit_dprintk(enum mminit_level level, const char *prefix, const char *fmt, ...) { } static inline void mminit_verify_pageflags_layout(void) { } static inline void mminit_verify_zonelist(void) { } #endif /* CONFIG_DEBUG_MEMORY_INIT */ #define NODE_RECLAIM_NOSCAN -2 #define NODE_RECLAIM_FULL -1 #define NODE_RECLAIM_SOME 0 #define NODE_RECLAIM_SUCCESS 1 #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else #define node_reclaim_mode 0 static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order) { return NODE_RECLAIM_NOSCAN; } static inline int find_next_best_node(int node, nodemask_t *used_node_mask) { return NUMA_NO_NODE; } #endif static inline bool node_reclaim_enabled(void) { /* Is any node_reclaim_mode bit set? */ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } /* * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); void shake_folio(struct folio *folio); typedef int hwpoison_filter_func_t(struct page *p); void hwpoison_filter_register(hwpoison_filter_func_t *filter); void hwpoison_filter_unregister(void); #define MAGIC_HWPOISON 0x48575053U /* HWPS */ void SetPageHWPoisonTakenOff(struct page *page); void ClearPageHWPoisonTakenOff(struct page *page); bool take_page_off_buddy(struct page *page); bool put_page_back_buddy(struct page *page); struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); void add_to_kill_ksm(struct task_struct *tsk, const struct page *p, struct vm_area_struct *vma, struct list_head *to_kill, unsigned long ksm_addr); unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma); #else static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { return -EBUSY; } #endif extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern void set_pageblock_order(void); unsigned long reclaim_pages(struct list_head *folio_list); unsigned int reclaim_clean_pages_from_list(struct zone *zone, struct list_head *folio_list); /* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW #define ALLOC_WMARK_HIGH WMARK_HIGH #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ /* Mask to get the watermark bits */ #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) /* * Only MMU archs have async oom victim reclaim - aka oom_reaper so we * cannot assume a reduced access to memory reserves is sufficient for * !MMU */ #ifdef CONFIG_MMU #define ALLOC_OOM 0x08 #else #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif #define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access * to 25% of the min watermark or * 62.5% if __GFP_HIGH is set. */ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark. */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #ifdef CONFIG_ZONE_DMA32 #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ #else #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_TRYLOCK 0x400 /* Only use spin_trylock in allocation path */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ /* Flags that allow allocations below the min watermark. */ #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) enum ttu_flags; struct tlbflush_unmap_batch; /* * only for MM internal work items which do not depend on * any allocations or locks which might depend on allocations */ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { } static inline void try_to_unmap_flush_dirty(void) { } static inline void flush_tlb_batched_pending(struct mm_struct *mm) { } #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; void setup_zone_pageset(struct zone *zone); struct migration_target_control { int nid; /* preferred node id */ nodemask_t *nmask; gfp_t gfp_mask; enum migrate_reason reason; }; /* * mm/filemap.c */ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, struct folio *folio, loff_t fpos, size_t size); /* * mm/vmalloc.c */ #ifdef CONFIG_MMU void __init vmalloc_init(void); int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); unsigned int get_vm_area_page_order(struct vm_struct *vm); #else static inline void vmalloc_init(void) { } static inline int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { return -EINVAL; } #endif int __must_check __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); void free_zone_device_folio(struct folio *folio); int migrate_device_coherent_folio(struct folio *folio); struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, unsigned long vm_flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller); /* * mm/gup.c */ int __must_check try_grab_folio(struct folio *folio, int refs, unsigned int flags); /* * mm/huge_memory.c */ void touch_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, bool write); void touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write); /* * Parses a string with mem suffixes into its order. Useful to parse kernel * parameters. */ static inline int get_order_from_str(const char *size_str, unsigned long valid_orders) { unsigned long size; char *endptr; int order; size = memparse(size_str, &endptr); if (!is_power_of_2(size)) return -EINVAL; order = get_order(size); if (BIT(order) & ~valid_orders) return -EINVAL; return order; } enum { /* mark page accessed */ FOLL_TOUCH = 1 << 16, /* a retry, previous pass started an IO */ FOLL_TRIED = 1 << 17, /* we are working on non-current tsk/mm */ FOLL_REMOTE = 1 << 18, /* pages must be released via unpin_user_page */ FOLL_PIN = 1 << 19, /* gup_fast: prevent fall-back to slow gup */ FOLL_FAST_ONLY = 1 << 20, /* allow unlocking the mmap lock */ FOLL_UNLOCKABLE = 1 << 21, /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */ FOLL_MADV_POPULATE = 1 << 22, }; #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \ FOLL_MADV_POPULATE) /* * Indicates for which pages that are write-protected in the page table, * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the * GUP pin will remain consistent with the pages mapped into the page tables * of the MM. * * Temporary unmapping of PageAnonExclusive() pages or clearing of * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq * * GUP-fast and KSM or temporary unmapping (swap, migration): see * folio_try_share_anon_rmap_*() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. * * If the vma is NULL, we're coming from the GUP-fast path and might have * to fallback to the slow path just to lookup the vma. */ static inline bool gup_must_unshare(struct vm_area_struct *vma, unsigned int flags, struct page *page) { /* * FOLL_WRITE is implicitly handled correctly as the page table entry * has to be writable -- and if it references (part of) an anonymous * folio, that part is required to be marked exclusive. */ if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) return false; /* * Note: PageAnon(page) is stable until the page is actually getting * freed. */ if (!PageAnon(page)) { /* * We only care about R/O long-term pining: R/O short-term * pinning does not have the semantics to observe successive * changes through the process page tables. */ if (!(flags & FOLL_LONGTERM)) return false; /* We really need the vma ... */ if (!vma) return true; /* * ... because we only care about writable private ("COW") * mappings where we have to break COW early. */ return is_cow_mapping(vma->vm_flags); } /* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_rmb(); /* * Note that KSM pages cannot be exclusive, and consequently, * cannot get pinned. */ return !PageAnonExclusive(page); } extern bool mirrored_kernelcore; bool memblock_has_mirror(void); void memblock_free_all(void); static __always_inline void vma_set_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff) { vma->vm_start = start; vma->vm_end = end; vma->vm_pgoff = pgoff; } static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) { /* * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty * enablements, because when without soft-dirty being compiled in, * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true. */ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return false; /* * Soft-dirty is kind of special: its tracking is enabled when the * vma flags not set. */ return !(vma->vm_flags & VM_SOFTDIRTY); } static inline bool pmd_needs_soft_dirty_wp(struct vm_area_struct *vma, pmd_t pmd) { return vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd); } static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte) { return vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte); } void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); void __meminit __init_page_from_nid(unsigned long pfn, int nid); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_SHRINKER_DEBUG static inline __printf(2, 0) int shrinker_debugfs_name_alloc( struct shrinker *shrinker, const char *fmt, va_list ap) { shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); return shrinker->name ? 0 : -ENOMEM; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { kfree_const(shrinker->name); shrinker->name = NULL; } extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id); #else /* CONFIG_SHRINKER_DEBUG */ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, const char *fmt, va_list ap) { return 0; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { } static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { *debugfs_id = -1; return NULL; } static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id) { } #endif /* CONFIG_SHRINKER_DEBUG */ /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); extern struct list_lru shadow_nodes; #define mapping_set_update(xas, mapping) do { \ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ xas_set_update(xas, workingset_update_node); \ xas_set_lru(xas, &shadow_nodes); \ } \ } while (0) /* mremap.c */ unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { } #endif /* CONFIG_UNACCEPTED_MEMORY */ /* pagewalk.c */ int walk_page_range_mm(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); int walk_page_range_debug(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private); /* pt_reclaim.c */ bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval); void free_pte(struct mm_struct *mm, unsigned long addr, struct mmu_gather *tlb, pmd_t pmdval); void try_to_free_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, struct mmu_gather *tlb); #ifdef CONFIG_PT_RECLAIM bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, struct zap_details *details); #else static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, struct zap_details *details) { return false; } #endif /* CONFIG_PT_RECLAIM */ void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm); int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm); #endif /* __MM_INTERNAL_H */ |
| 5 5 5 5 5 4 5 5 5 6 6 6 5 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/hfsplus/super.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * */ #include <linux/module.h> #include <linux/init.h> #include <linux/pagemap.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/slab.h> #include <linux/vfs.h> #include <linux/nls.h> static struct inode *hfsplus_alloc_inode(struct super_block *sb); static void hfsplus_free_inode(struct inode *inode); #include "hfsplus_fs.h" #include "xattr.h" static int hfsplus_system_read_inode(struct inode *inode) { struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; switch (inode->i_ino) { case HFSPLUS_EXT_CNID: hfsplus_inode_read_fork(inode, &vhdr->ext_file); inode->i_mapping->a_ops = &hfsplus_btree_aops; break; case HFSPLUS_CAT_CNID: hfsplus_inode_read_fork(inode, &vhdr->cat_file); inode->i_mapping->a_ops = &hfsplus_btree_aops; break; case HFSPLUS_ALLOC_CNID: hfsplus_inode_read_fork(inode, &vhdr->alloc_file); inode->i_mapping->a_ops = &hfsplus_aops; break; case HFSPLUS_START_CNID: hfsplus_inode_read_fork(inode, &vhdr->start_file); break; case HFSPLUS_ATTR_CNID: hfsplus_inode_read_fork(inode, &vhdr->attr_file); inode->i_mapping->a_ops = &hfsplus_btree_aops; break; default: return -EIO; } return 0; } struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) { struct hfs_find_data fd; struct inode *inode; int err; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; atomic_set(&HFSPLUS_I(inode)->opencnt, 0); HFSPLUS_I(inode)->first_blocks = 0; HFSPLUS_I(inode)->clump_blocks = 0; HFSPLUS_I(inode)->alloc_blocks = 0; HFSPLUS_I(inode)->cached_start = U32_MAX; HFSPLUS_I(inode)->cached_blocks = 0; memset(HFSPLUS_I(inode)->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(HFSPLUS_I(inode)->cached_extents, 0, sizeof(hfsplus_extent_rec)); HFSPLUS_I(inode)->extent_state = 0; mutex_init(&HFSPLUS_I(inode)->extents_lock); HFSPLUS_I(inode)->rsrc_inode = NULL; HFSPLUS_I(inode)->create_date = 0; HFSPLUS_I(inode)->linkid = 0; HFSPLUS_I(inode)->flags = 0; HFSPLUS_I(inode)->fs_blocks = 0; HFSPLUS_I(inode)->userflags = 0; HFSPLUS_I(inode)->subfolders = 0; INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); spin_lock_init(&HFSPLUS_I(inode)->open_dir_lock); HFSPLUS_I(inode)->phys_size = 0; if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) { err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); if (!err) { err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); if (!err) err = hfsplus_cat_read_inode(inode, &fd); hfs_find_exit(&fd); } } else { err = hfsplus_system_read_inode(inode); } if (err) { iget_failed(inode); return ERR_PTR(err); } unlock_new_inode(inode); return inode; } static int hfsplus_system_write_inode(struct inode *inode) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; struct hfsplus_fork_raw *fork; struct hfs_btree *tree = NULL; switch (inode->i_ino) { case HFSPLUS_EXT_CNID: fork = &vhdr->ext_file; tree = sbi->ext_tree; break; case HFSPLUS_CAT_CNID: fork = &vhdr->cat_file; tree = sbi->cat_tree; break; case HFSPLUS_ALLOC_CNID: fork = &vhdr->alloc_file; break; case HFSPLUS_START_CNID: fork = &vhdr->start_file; break; case HFSPLUS_ATTR_CNID: fork = &vhdr->attr_file; tree = sbi->attr_tree; break; default: return -EIO; } if (fork->total_size != cpu_to_be64(inode->i_size)) { set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); hfsplus_mark_mdb_dirty(inode->i_sb); } hfsplus_inode_write_fork(inode, fork); if (tree) { int err = hfs_btree_write(tree); if (err) { pr_err("b-tree write err: %d, ino %lu\n", err, inode->i_ino); return err; } } return 0; } static int hfsplus_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; hfs_dbg("ino %lu\n", inode->i_ino); err = hfsplus_ext_write_extent(inode); if (err) return err; if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) return hfsplus_cat_write_inode(inode); else return hfsplus_system_write_inode(inode); } static void hfsplus_evict_inode(struct inode *inode) { hfs_dbg("ino %lu\n", inode->i_ino); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (HFSPLUS_IS_RSRC(inode)) { HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; iput(HFSPLUS_I(inode)->rsrc_inode); } } static int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; int write_backup = 0; int error, error2; if (!wait) return 0; hfs_dbg("starting...\n"); /* * Explicitly write out the special metadata inodes. * * While these special inodes are marked as hashed and written * out peridocically by the flusher threads we redirty them * during writeout of normal inodes, and thus the life lock * prevents us from getting the latest state to disk. */ error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); if (!error) error = error2; if (sbi->attr_tree) { error2 = filemap_write_and_wait(sbi->attr_tree->inode->i_mapping); if (!error) error = error2; } error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) error = error2; mutex_lock(&sbi->vh_mutex); mutex_lock(&sbi->alloc_mutex); vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); vhdr->folder_count = cpu_to_be32(sbi->folder_count); vhdr->file_count = cpu_to_be32(sbi->file_count); hfs_dbg("free_blocks %u, next_cnid %u, folder_count %u, file_count %u\n", sbi->free_blocks, sbi->next_cnid, sbi->folder_count, sbi->file_count); if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { memcpy(sbi->s_backup_vhdr, sbi->s_vhdr, sizeof(*sbi->s_vhdr)); write_backup = 1; } error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, NULL, REQ_OP_WRITE); if (!error) error = error2; if (!write_backup) goto out; error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE); if (!error) error2 = error; out: mutex_unlock(&sbi->alloc_mutex); mutex_unlock(&sbi->vh_mutex); if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) blkdev_issue_flush(sb->s_bdev); hfs_dbg("finished: err %d\n", error); return error; } static void delayed_sync_fs(struct work_struct *work) { int err; struct hfsplus_sb_info *sbi; sbi = container_of(work, struct hfsplus_sb_info, sync_work.work); spin_lock(&sbi->work_lock); sbi->work_queued = 0; spin_unlock(&sbi->work_lock); err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); if (err) pr_err("delayed sync fs err %d\n", err); } void hfsplus_mark_mdb_dirty(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); unsigned long delay; if (sb_rdonly(sb)) return; spin_lock(&sbi->work_lock); if (!sbi->work_queued) { delay = msecs_to_jiffies(dirty_writeback_interval * 10); queue_delayed_work(system_long_wq, &sbi->sync_work, delay); sbi->work_queued = 1; } spin_unlock(&sbi->work_lock); } static void delayed_free(struct rcu_head *p) { struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu); unload_nls(sbi->nls); kfree(sbi); } static void hfsplus_put_super(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); hfs_dbg("starting...\n"); cancel_delayed_work_sync(&sbi->sync_work); if (!sb_rdonly(sb) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; vhdr->modify_date = hfsp_now2mt(); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); hfsplus_sync_fs(sb, 1); } iput(sbi->alloc_file); iput(sbi->hidden_dir); hfs_btree_close(sbi->attr_tree); hfs_btree_close(sbi->cat_tree); hfs_btree_close(sbi->ext_tree); kfree(sbi->s_vhdr_buf); kfree(sbi->s_backup_vhdr_buf); call_rcu(&sbi->rcu, delayed_free); hfs_dbg("finished\n"); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = HFSPLUS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = sbi->total_blocks << sbi->fs_shift; buf->f_bfree = sbi->free_blocks << sbi->fs_shift; buf->f_bavail = buf->f_bfree; buf->f_files = 0xFFFFFFFF; buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; buf->f_fsid = u64_to_fsid(id); buf->f_namelen = HFSPLUS_MAX_STRLEN; return 0; } static int hfsplus_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; sync_filesystem(sb); if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) return 0; if (!(fc->sb_flags & SB_RDONLY)) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n"); sb->s_flags |= SB_RDONLY; fc->sb_flags |= SB_RDONLY; } else if (test_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("filesystem is marked locked, leaving read-only.\n"); sb->s_flags |= SB_RDONLY; fc->sb_flags |= SB_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { pr_warn("filesystem is marked journaled, leaving read-only.\n"); sb->s_flags |= SB_RDONLY; fc->sb_flags |= SB_RDONLY; } } return 0; } static const struct super_operations hfsplus_sops = { .alloc_inode = hfsplus_alloc_inode, .free_inode = hfsplus_free_inode, .write_inode = hfsplus_write_inode, .evict_inode = hfsplus_evict_inode, .put_super = hfsplus_put_super, .sync_fs = hfsplus_sync_fs, .statfs = hfsplus_statfs, .show_options = hfsplus_show_options, }; static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc) { struct hfsplus_vh *vhdr; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); hfsplus_cat_entry entry; struct hfs_find_data fd; struct inode *root, *inode; struct qstr str; struct nls_table *nls; u64 last_fs_block, last_fs_page; int silent = fc->sb_flags & SB_SILENT; int err; mutex_init(&sbi->alloc_mutex); mutex_init(&sbi->vh_mutex); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); err = -EINVAL; if (!sbi->nls) { /* try utf8 first, as this is the old default behaviour */ sbi->nls = load_nls("utf8"); if (!sbi->nls) sbi->nls = load_nls_default(); } /* temporarily use utf8 to correctly find the hidden dir below */ nls = sbi->nls; sbi->nls = load_nls("utf8"); if (!sbi->nls) { pr_err("unable to load nls for utf8\n"); goto out_unload_nls; } /* Grab the volume header */ if (hfsplus_read_wrapper(sb)) { if (!silent) pr_warn("unable to find HFS+ superblock\n"); goto out_unload_nls; } vhdr = sbi->s_vhdr; /* Copy parts of the volume header into the superblock */ sb->s_magic = HFSPLUS_VOLHEAD_SIG; if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { pr_err("wrong filesystem version\n"); goto out_free_vhdr; } sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); sbi->file_count = be32_to_cpu(vhdr->file_count); sbi->folder_count = be32_to_cpu(vhdr->folder_count); sbi->data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; if (!sbi->data_clump_blocks) sbi->data_clump_blocks = 1; sbi->rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; err = -EFBIG; last_fs_block = sbi->total_blocks - 1; last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >> PAGE_SHIFT; if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || (last_fs_page > (pgoff_t)(~0ULL))) { pr_err("filesystem size too large\n"); goto out_free_vhdr; } /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; sb->s_maxbytes = MAX_LFS_FILESIZE; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n"); sb->s_flags |= SB_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { pr_warn("Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= SB_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !sb_rdonly(sb)) { pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= SB_RDONLY; } err = -EINVAL; /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { pr_err("failed to load extents file\n"); goto out_free_vhdr; } sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); if (!sbi->cat_tree) { pr_err("failed to load catalog file\n"); goto out_close_ext_tree; } atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE); if (vhdr->attr_file.total_blocks != 0) { sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); if (!sbi->attr_tree) { pr_err("failed to load attributes file\n"); goto out_close_cat_tree; } atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE); } sb->s_xattr = hfsplus_xattr_handlers; inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { pr_err("failed to load allocation file\n"); err = PTR_ERR(inode); goto out_close_attr_tree; } sbi->alloc_file = inode; /* Load the root directory */ root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); if (IS_ERR(root)) { pr_err("failed to load root directory\n"); err = PTR_ERR(root); goto out_put_alloc_file; } set_default_d_op(sb, &hfsplus_dentry_operations); sb->s_root = d_make_root(root); if (!sb->s_root) { err = -ENOMEM; goto out_put_alloc_file; } str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; err = hfs_find_init(sbi->cat_tree, &fd); if (err) goto out_put_root; err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (unlikely(err < 0)) goto out_put_root; if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) { err = -EIO; goto out_put_root; } inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_put_root; } sbi->hidden_dir = inode; } else hfs_find_exit(&fd); if (!sb_rdonly(sb)) { /* * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused * all three are registered with Apple for our use */ vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); vhdr->modify_date = hfsp_now2mt(); be32_add_cpu(&vhdr->write_count, 1); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); hfsplus_sync_fs(sb, 1); if (!sbi->hidden_dir) { mutex_lock(&sbi->vh_mutex); sbi->hidden_dir = hfsplus_new_inode(sb, root, S_IFDIR); if (!sbi->hidden_dir) { mutex_unlock(&sbi->vh_mutex); err = -ENOMEM; goto out_put_root; } err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, sbi->hidden_dir); if (err) { mutex_unlock(&sbi->vh_mutex); goto out_put_hidden_dir; } err = hfsplus_init_security(sbi->hidden_dir, root, &str); if (err == -EOPNOTSUPP) err = 0; /* Operation is not supported. */ else if (err) { /* * Try to delete anyway without * error analysis. */ hfsplus_delete_cat(sbi->hidden_dir->i_ino, root, &str); mutex_unlock(&sbi->vh_mutex); goto out_put_hidden_dir; } mutex_unlock(&sbi->vh_mutex); hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); } } unload_nls(sbi->nls); sbi->nls = nls; return 0; out_put_hidden_dir: cancel_delayed_work_sync(&sbi->sync_work); iput(sbi->hidden_dir); out_put_root: dput(sb->s_root); sb->s_root = NULL; out_put_alloc_file: iput(sbi->alloc_file); out_close_attr_tree: hfs_btree_close(sbi->attr_tree); out_close_cat_tree: hfs_btree_close(sbi->cat_tree); out_close_ext_tree: hfs_btree_close(sbi->ext_tree); out_free_vhdr: kfree(sbi->s_vhdr_buf); kfree(sbi->s_backup_vhdr_buf); out_unload_nls: unload_nls(sbi->nls); unload_nls(nls); kfree(sbi); return err; } MODULE_AUTHOR("Brad Boyer"); MODULE_DESCRIPTION("Extended Macintosh Filesystem"); MODULE_LICENSE("GPL"); static struct kmem_cache *hfsplus_inode_cachep; static struct inode *hfsplus_alloc_inode(struct super_block *sb) { struct hfsplus_inode_info *i; i = alloc_inode_sb(sb, hfsplus_inode_cachep, GFP_KERNEL); return i ? &i->vfs_inode : NULL; } static void hfsplus_free_inode(struct inode *inode) { kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); } #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) static int hfsplus_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, hfsplus_fill_super); } static void hfsplus_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations hfsplus_context_ops = { .parse_param = hfsplus_parse_param, .get_tree = hfsplus_get_tree, .reconfigure = hfsplus_reconfigure, .free = hfsplus_free_fc, }; static int hfsplus_init_fs_context(struct fs_context *fc) { struct hfsplus_sb_info *sbi; sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) hfsplus_fill_defaults(sbi); fc->s_fs_info = sbi; fc->ops = &hfsplus_context_ops; return 0; } static struct file_system_type hfsplus_fs_type = { .owner = THIS_MODULE, .name = "hfsplus", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = hfsplus_init_fs_context, }; MODULE_ALIAS_FS("hfsplus"); static void hfsplus_init_once(void *p) { struct hfsplus_inode_info *i = p; inode_init_once(&i->vfs_inode); } static int __init init_hfsplus_fs(void) { int err; hfsplus_inode_cachep = kmem_cache_create("hfsplus_icache", HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, hfsplus_init_once); if (!hfsplus_inode_cachep) return -ENOMEM; err = hfsplus_create_attr_tree_cache(); if (err) goto destroy_inode_cache; err = register_filesystem(&hfsplus_fs_type); if (err) goto destroy_attr_tree_cache; return 0; destroy_attr_tree_cache: hfsplus_destroy_attr_tree_cache(); destroy_inode_cache: kmem_cache_destroy(hfsplus_inode_cachep); return err; } static void __exit exit_hfsplus_fs(void) { unregister_filesystem(&hfsplus_fs_type); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); hfsplus_destroy_attr_tree_cache(); kmem_cache_destroy(hfsplus_inode_cachep); } module_init(init_hfsplus_fs) module_exit(exit_hfsplus_fs) |
| 4 3 4 4 1 3 3 3 3 4 3 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | // SPDX-License-Identifier: GPL-2.0+ /* * comedi/drivers/8255.c * Driver for 8255 * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1998 David A. Schleef <ds@schleef.org> */ /* * Driver: 8255 * Description: generic 8255 support * Devices: [standard] 8255 (8255) * Author: ds * Status: works * Updated: Fri, 7 Jun 2002 12:56:45 -0700 * * The classic in digital I/O. The 8255 appears in Comedi as a single * digital I/O subdevice with 24 channels. The channel 0 corresponds * to the 8255's port A, bit 0; channel 23 corresponds to port C, bit * 7. Direction configuration is done in blocks, with channels 0-7, * 8-15, 16-19, and 20-23 making up the 4 blocks. The only 8255 mode * supported is mode 0. * * You should enable compilation this driver if you plan to use a board * that has an 8255 chip. For multifunction boards, the main driver will * configure the 8255 subdevice automatically. * * This driver also works independently with ISA and PCI cards that * directly map the 8255 registers to I/O ports, including cards with * multiple 8255 chips. To configure the driver for such a card, the * option list should be a list of the I/O port bases for each of the * 8255 chips. For example, * * comedi_config /dev/comedi0 8255 0x200,0x204,0x208,0x20c * * Note that most PCI 8255 boards do NOT work with this driver, and * need a separate driver as a wrapper. For those that do work, the * I/O port base address can be found in the output of 'lspci -v'. */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/comedi/comedi_8255.h> static int dev_8255_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_subdevice *s; unsigned long iobase; int ret; int i; for (i = 0; i < COMEDI_NDEVCONFOPTS; i++) { iobase = it->options[i]; if (!iobase) break; } if (i == 0) { dev_warn(dev->class_dev, "no devices specified\n"); return -EINVAL; } ret = comedi_alloc_subdevices(dev, i); if (ret) return ret; for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; iobase = it->options[i]; /* * __comedi_request_region() does not set dev->iobase. * * For 8255 devices that are manually attached using * comedi_config, the 'iobase' is the actual I/O port * base address of the chip. */ ret = __comedi_request_region(dev, iobase, I8255_SIZE); if (ret) { s->type = COMEDI_SUBD_UNUSED; } else { ret = subdev_8255_io_init(dev, s, iobase); if (ret) { /* * Release the I/O port region here, as the * "detach" handler cannot find it. */ release_region(iobase, I8255_SIZE); s->type = COMEDI_SUBD_UNUSED; return ret; } } } return 0; } static void dev_8255_detach(struct comedi_device *dev) { struct comedi_subdevice *s; int i; for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (s->type != COMEDI_SUBD_UNUSED) { unsigned long regbase = subdev_8255_regbase(s); release_region(regbase, I8255_SIZE); } } } static struct comedi_driver dev_8255_driver = { .driver_name = "8255", .module = THIS_MODULE, .attach = dev_8255_attach, .detach = dev_8255_detach, }; module_comedi_driver(dev_8255_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for standalone 8255 devices"); MODULE_LICENSE("GPL"); |
| 2516 1925 2512 2503 892 2222 1620 1617 1551 1533 1360 948 430 9 659 339 438 437 436 438 438 437 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. */ #include <linux/export.h> #include <net/ipv6.h> #include <net/ipv6_stubs.h> #include <net/addrconf.h> #include <net/ip.h> /* if ipv6 module registers this function is used by xfrm to force all * sockets to relookup their nodes - this is fairly expensive, be * careful */ void (*__fib6_flush_trees)(struct net *); EXPORT_SYMBOL(__fib6_flush_trees); #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) static inline unsigned int ipv6_addr_scope2type(unsigned int scope) { switch (scope) { case IPV6_ADDR_SCOPE_NODELOCAL: return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | IPV6_ADDR_LOOPBACK); case IPV6_ADDR_SCOPE_LINKLOCAL: return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) | IPV6_ADDR_LINKLOCAL); case IPV6_ADDR_SCOPE_SITELOCAL: return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) | IPV6_ADDR_SITELOCAL); } return IPV6_ADDR_SCOPE_TYPE(scope); } int __ipv6_addr_type(const struct in6_addr *addr) { __be32 st; st = addr->s6_addr32[0]; /* Consider all addresses with the first three bits different of 000 and 111 as unicasts. */ if ((st & htonl(0xE0000000)) != htonl(0x00000000) && (st & htonl(0xE0000000)) != htonl(0xE0000000)) return (IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { /* multicast */ /* addr-select 3.1 */ return (IPV6_ADDR_MULTICAST | ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr))); } if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */ if ((st & htonl(0xFE000000)) == htonl(0xFC000000)) return (IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* RFC 4193 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { if (addr->s6_addr32[3] == 0) return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == htonl(0x00000001)) return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */ return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } if (addr->s6_addr32[2] == htonl(0x0000ffff)) return (IPV6_ADDR_MAPPED | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } return (IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */ } EXPORT_SYMBOL(__ipv6_addr_type); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain); int register_inet6addr_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&inet6addr_chain, nb); } EXPORT_SYMBOL(register_inet6addr_notifier); int unregister_inet6addr_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&inet6addr_chain, nb); } EXPORT_SYMBOL(unregister_inet6addr_notifier); int inet6addr_notifier_call_chain(unsigned long val, void *v) { return atomic_notifier_call_chain(&inet6addr_chain, val, v); } EXPORT_SYMBOL(inet6addr_notifier_call_chain); int register_inet6addr_validator_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&inet6addr_validator_chain, nb); } EXPORT_SYMBOL(register_inet6addr_validator_notifier); int unregister_inet6addr_validator_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&inet6addr_validator_chain, nb); } EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) { return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v); } EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { return ERR_PTR(-EAFNOSUPPORT); } static int eafnosupport_ipv6_route_input(struct sk_buff *skb) { return -EAFNOSUPPORT; } static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) { return NULL; } static int eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags) { return -EAFNOSUPPORT; } static int eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags) { return -EAFNOSUPPORT; } static void eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { } static u32 eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr) { return 0; } static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); return -EAFNOSUPPORT; } static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify) { return -EAFNOSUPPORT; } static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { kfree_skb(skb); return -EAFNOSUPPORT; } static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, struct net_device *dev) { return ERR_PTR(-EAFNOSUPPORT); } const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, .fib6_select_path = eafnosupport_fib6_select_path, .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6, .fib6_nh_init = eafnosupport_fib6_nh_init, .ip6_del_rt = eafnosupport_ip6_del_rt, .ipv6_fragment = eafnosupport_ipv6_fragment, .ipv6_dev_find = eafnosupport_ipv6_dev_find, }; EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) { kfree(idev->stats.icmpv6msgdev); kfree(idev->stats.icmpv6dev); free_percpu(idev->stats.ipv6); } static void in6_dev_finish_destroy_rcu(struct rcu_head *head) { struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); snmp6_free_dev(idev); kfree(idev); } /* Nobody refers to this device, we may destroy it. */ void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(rcu_access_pointer(idev->mc_list)); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif netdev_put(dev, &idev->dev_tracker); if (!idev->dead) { pr_warn("Freeing alive inet6 device %p\n", idev); return; } call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } EXPORT_SYMBOL(in6_dev_finish_destroy); |
| 433 433 8 8 5 432 5 416 8 8 5 422 425 257 258 258 240 238 233 388 425 425 440 14 426 7 2 425 425 424 5 430 430 429 430 428 430 429 428 429 430 430 429 430 430 8 8 8 8 8 8 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * * File: ima_crypto.c * Calculates md5/sha1 file hash, template hash, boot-aggreate hash */ #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/ratelimit.h> #include <linux/file.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <linux/err.h> #include <linux/slab.h> #include <crypto/hash.h> #include "ima.h" /* minimum file size for ahash use */ static unsigned long ima_ahash_minsize; module_param_named(ahash_minsize, ima_ahash_minsize, ulong, 0644); MODULE_PARM_DESC(ahash_minsize, "Minimum file size for ahash use"); /* default is 0 - 1 page. */ static int ima_maxorder; static unsigned int ima_bufsize = PAGE_SIZE; static int param_set_bufsize(const char *val, const struct kernel_param *kp) { unsigned long long size; int order; size = memparse(val, NULL); order = get_order(size); if (order > MAX_PAGE_ORDER) return -EINVAL; ima_maxorder = order; ima_bufsize = PAGE_SIZE << order; return 0; } static const struct kernel_param_ops param_ops_bufsize = { .set = param_set_bufsize, .get = param_get_uint, }; #define param_check_bufsize(name, p) __param_check(name, p, unsigned int) module_param_named(ahash_bufsize, ima_bufsize, bufsize, 0644); MODULE_PARM_DESC(ahash_bufsize, "Maximum ahash buffer size"); static struct crypto_shash *ima_shash_tfm; static struct crypto_ahash *ima_ahash_tfm; int ima_sha1_idx __ro_after_init; int ima_hash_algo_idx __ro_after_init; /* * Additional number of slots reserved, as needed, for SHA1 * and IMA default algo. */ int ima_extra_slots __ro_after_init; struct ima_algo_desc *ima_algo_array __ro_after_init; static int __init ima_init_ima_crypto(void) { long rc; ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0); if (IS_ERR(ima_shash_tfm)) { rc = PTR_ERR(ima_shash_tfm); pr_err("Can not allocate %s (reason: %ld)\n", hash_algo_name[ima_hash_algo], rc); return rc; } pr_info("Allocated hash algorithm: %s\n", hash_algo_name[ima_hash_algo]); return 0; } static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo) { struct crypto_shash *tfm = ima_shash_tfm; int rc, i; if (algo < 0 || algo >= HASH_ALGO__LAST) algo = ima_hash_algo; if (algo == ima_hash_algo) return tfm; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) if (ima_algo_array[i].tfm && ima_algo_array[i].algo == algo) return ima_algo_array[i].tfm; tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0); if (IS_ERR(tfm)) { rc = PTR_ERR(tfm); pr_err("Can not allocate %s (reason: %d)\n", hash_algo_name[algo], rc); } return tfm; } int __init ima_init_crypto(void) { enum hash_algo algo; long rc; int i; rc = ima_init_ima_crypto(); if (rc) return rc; ima_sha1_idx = -1; ima_hash_algo_idx = -1; for (i = 0; i < NR_BANKS(ima_tpm_chip); i++) { algo = ima_tpm_chip->allocated_banks[i].crypto_id; if (algo == HASH_ALGO_SHA1) ima_sha1_idx = i; if (algo == ima_hash_algo) ima_hash_algo_idx = i; } if (ima_sha1_idx < 0) { ima_sha1_idx = NR_BANKS(ima_tpm_chip) + ima_extra_slots++; if (ima_hash_algo == HASH_ALGO_SHA1) ima_hash_algo_idx = ima_sha1_idx; } if (ima_hash_algo_idx < 0) ima_hash_algo_idx = NR_BANKS(ima_tpm_chip) + ima_extra_slots++; ima_algo_array = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots, sizeof(*ima_algo_array), GFP_KERNEL); if (!ima_algo_array) { rc = -ENOMEM; goto out; } for (i = 0; i < NR_BANKS(ima_tpm_chip); i++) { algo = ima_tpm_chip->allocated_banks[i].crypto_id; ima_algo_array[i].algo = algo; /* unknown TPM algorithm */ if (algo == HASH_ALGO__LAST) continue; if (algo == ima_hash_algo) { ima_algo_array[i].tfm = ima_shash_tfm; continue; } ima_algo_array[i].tfm = ima_alloc_tfm(algo); if (IS_ERR(ima_algo_array[i].tfm)) { if (algo == HASH_ALGO_SHA1) { rc = PTR_ERR(ima_algo_array[i].tfm); ima_algo_array[i].tfm = NULL; goto out_array; } ima_algo_array[i].tfm = NULL; } } if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip)) { if (ima_hash_algo == HASH_ALGO_SHA1) { ima_algo_array[ima_sha1_idx].tfm = ima_shash_tfm; } else { ima_algo_array[ima_sha1_idx].tfm = ima_alloc_tfm(HASH_ALGO_SHA1); if (IS_ERR(ima_algo_array[ima_sha1_idx].tfm)) { rc = PTR_ERR(ima_algo_array[ima_sha1_idx].tfm); goto out_array; } } ima_algo_array[ima_sha1_idx].algo = HASH_ALGO_SHA1; } if (ima_hash_algo_idx >= NR_BANKS(ima_tpm_chip) && ima_hash_algo_idx != ima_sha1_idx) { ima_algo_array[ima_hash_algo_idx].tfm = ima_shash_tfm; ima_algo_array[ima_hash_algo_idx].algo = ima_hash_algo; } return 0; out_array: for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) { if (!ima_algo_array[i].tfm || ima_algo_array[i].tfm == ima_shash_tfm) continue; crypto_free_shash(ima_algo_array[i].tfm); } kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; } static void ima_free_tfm(struct crypto_shash *tfm) { int i; if (tfm == ima_shash_tfm) return; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) if (ima_algo_array[i].tfm == tfm) return; crypto_free_shash(tfm); } /** * ima_alloc_pages() - Allocate contiguous pages. * @max_size: Maximum amount of memory to allocate. * @allocated_size: Returned size of actual allocation. * @last_warn: Should the min_size allocation warn or not. * * Tries to do opportunistic allocation for memory first trying to allocate * max_size amount of memory and then splitting that until zero order is * reached. Allocation is tried without generating allocation warnings unless * last_warn is set. Last_warn set affects only last allocation of zero order. * * By default, ima_maxorder is 0 and it is equivalent to kmalloc(GFP_KERNEL) * * Return pointer to allocated memory, or NULL on failure. */ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size, int last_warn) { void *ptr; int order = ima_maxorder; gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY; if (order) order = min(get_order(max_size), order); for (; order; order--) { ptr = (void *)__get_free_pages(gfp_mask, order); if (ptr) { *allocated_size = PAGE_SIZE << order; return ptr; } } /* order is zero - one page */ gfp_mask = GFP_KERNEL; if (!last_warn) gfp_mask |= __GFP_NOWARN; ptr = (void *)__get_free_pages(gfp_mask, 0); if (ptr) { *allocated_size = PAGE_SIZE; return ptr; } *allocated_size = 0; return NULL; } /** * ima_free_pages() - Free pages allocated by ima_alloc_pages(). * @ptr: Pointer to allocated pages. * @size: Size of allocated buffer. */ static void ima_free_pages(void *ptr, size_t size) { if (!ptr) return; free_pages((unsigned long)ptr, get_order(size)); } static struct crypto_ahash *ima_alloc_atfm(enum hash_algo algo) { struct crypto_ahash *tfm = ima_ahash_tfm; int rc; if (algo < 0 || algo >= HASH_ALGO__LAST) algo = ima_hash_algo; if (algo != ima_hash_algo || !tfm) { tfm = crypto_alloc_ahash(hash_algo_name[algo], 0, 0); if (!IS_ERR(tfm)) { if (algo == ima_hash_algo) ima_ahash_tfm = tfm; } else { rc = PTR_ERR(tfm); pr_err("Can not allocate %s (reason: %d)\n", hash_algo_name[algo], rc); } } return tfm; } static void ima_free_atfm(struct crypto_ahash *tfm) { if (tfm != ima_ahash_tfm) crypto_free_ahash(tfm); } static inline int ahash_wait(int err, struct crypto_wait *wait) { err = crypto_wait_req(err, wait); if (err) pr_crit_ratelimited("ahash calculation failed: err: %d\n", err); return err; } static int ima_calc_file_hash_atfm(struct file *file, struct ima_digest_data *hash, struct crypto_ahash *tfm) { loff_t i_size, offset; char *rbuf[2] = { NULL, }; int rc, rbuf_len, active = 0, ahash_rc = 0; struct ahash_request *req; struct scatterlist sg[1]; struct crypto_wait wait; size_t rbuf_size[2]; hash->length = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) return -ENOMEM; crypto_init_wait(&wait); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); rc = ahash_wait(crypto_ahash_init(req), &wait); if (rc) goto out1; i_size = i_size_read(file_inode(file)); if (i_size == 0) goto out2; /* * Try to allocate maximum size of memory. * Fail if even a single page cannot be allocated. */ rbuf[0] = ima_alloc_pages(i_size, &rbuf_size[0], 1); if (!rbuf[0]) { rc = -ENOMEM; goto out1; } /* Only allocate one buffer if that is enough. */ if (i_size > rbuf_size[0]) { /* * Try to allocate secondary buffer. If that fails fallback to * using single buffering. Use previous memory allocation size * as baseline for possible allocation size. */ rbuf[1] = ima_alloc_pages(i_size - rbuf_size[0], &rbuf_size[1], 0); } for (offset = 0; offset < i_size; offset += rbuf_len) { if (!rbuf[1] && offset) { /* Not using two buffers, and it is not the first * read/request, wait for the completion of the * previous ahash_update() request. */ rc = ahash_wait(ahash_rc, &wait); if (rc) goto out3; } /* read buffer */ rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]); rc = integrity_kernel_read(file, offset, rbuf[active], rbuf_len); if (rc != rbuf_len) { if (rc >= 0) rc = -EINVAL; /* * Forward current rc, do not overwrite with return value * from ahash_wait() */ ahash_wait(ahash_rc, &wait); goto out3; } if (rbuf[1] && offset) { /* Using two buffers, and it is not the first * read/request, wait for the completion of the * previous ahash_update() request. */ rc = ahash_wait(ahash_rc, &wait); if (rc) goto out3; } sg_init_one(&sg[0], rbuf[active], rbuf_len); ahash_request_set_crypt(req, sg, NULL, rbuf_len); ahash_rc = crypto_ahash_update(req); if (rbuf[1]) active = !active; /* swap buffers, if we use two */ } /* wait for the last update request to complete */ rc = ahash_wait(ahash_rc, &wait); out3: ima_free_pages(rbuf[0], rbuf_size[0]); ima_free_pages(rbuf[1], rbuf_size[1]); out2: if (!rc) { ahash_request_set_crypt(req, NULL, hash->digest, 0); rc = ahash_wait(crypto_ahash_final(req), &wait); } out1: ahash_request_free(req); return rc; } static int ima_calc_file_ahash(struct file *file, struct ima_digest_data *hash) { struct crypto_ahash *tfm; int rc; tfm = ima_alloc_atfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = ima_calc_file_hash_atfm(file, hash, tfm); ima_free_atfm(tfm); return rc; } static int ima_calc_file_hash_tfm(struct file *file, struct ima_digest_data *hash, struct crypto_shash *tfm) { loff_t i_size, offset = 0; char *rbuf; int rc; SHASH_DESC_ON_STACK(shash, tfm); shash->tfm = tfm; hash->length = crypto_shash_digestsize(tfm); rc = crypto_shash_init(shash); if (rc != 0) return rc; i_size = i_size_read(file_inode(file)); if (i_size == 0) goto out; rbuf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!rbuf) return -ENOMEM; while (offset < i_size) { int rbuf_len; rbuf_len = integrity_kernel_read(file, offset, rbuf, PAGE_SIZE); if (rbuf_len < 0) { rc = rbuf_len; break; } if (rbuf_len == 0) { /* unexpected EOF */ rc = -EINVAL; break; } offset += rbuf_len; rc = crypto_shash_update(shash, rbuf, rbuf_len); if (rc) break; } kfree(rbuf); out: if (!rc) rc = crypto_shash_final(shash, hash->digest); return rc; } static int ima_calc_file_shash(struct file *file, struct ima_digest_data *hash) { struct crypto_shash *tfm; int rc; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = ima_calc_file_hash_tfm(file, hash, tfm); ima_free_tfm(tfm); return rc; } /* * ima_calc_file_hash - calculate file hash * * Asynchronous hash (ahash) allows using HW acceleration for calculating * a hash. ahash performance varies for different data sizes on different * crypto accelerators. shash performance might be better for smaller files. * The 'ima.ahash_minsize' module parameter allows specifying the best * minimum file size for using ahash on the system. * * If the ima.ahash_minsize parameter is not specified, this function uses * shash for the hash calculation. If ahash fails, it falls back to using * shash. */ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) { loff_t i_size; int rc; struct file *f = file; bool new_file_instance = false; /* * For consistency, fail file's opened with the O_DIRECT flag on * filesystems mounted with/without DAX option. */ if (file->f_flags & O_DIRECT) { hash->length = hash_digest_size[ima_hash_algo]; hash->algo = ima_hash_algo; return -EINVAL; } /* Open a new file instance in O_RDONLY if we cannot read */ if (!(file->f_mode & FMODE_READ)) { int flags = file->f_flags & ~(O_WRONLY | O_APPEND | O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL); flags |= O_RDONLY; f = dentry_open(&file->f_path, flags, file->f_cred); if (IS_ERR(f)) return PTR_ERR(f); new_file_instance = true; } i_size = i_size_read(file_inode(f)); if (ima_ahash_minsize && i_size >= ima_ahash_minsize) { rc = ima_calc_file_ahash(f, hash); if (!rc) goto out; } rc = ima_calc_file_shash(f, hash); out: if (new_file_instance) fput(f); return rc; } /* * Calculate the hash of template data */ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, struct ima_template_entry *entry, int tfm_idx) { SHASH_DESC_ON_STACK(shash, ima_algo_array[tfm_idx].tfm); struct ima_template_desc *td = entry->template_desc; int num_fields = entry->template_desc->num_fields; int rc, i; shash->tfm = ima_algo_array[tfm_idx].tfm; rc = crypto_shash_init(shash); if (rc != 0) return rc; for (i = 0; i < num_fields; i++) { u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 }; u8 *data_to_hash = field_data[i].data; u32 datalen = field_data[i].len; u32 datalen_to_hash = !ima_canonical_fmt ? datalen : (__force u32)cpu_to_le32(datalen); if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) { rc = crypto_shash_update(shash, (const u8 *) &datalen_to_hash, sizeof(datalen_to_hash)); if (rc) break; } else if (strcmp(td->fields[i]->field_id, "n") == 0) { memcpy(buffer, data_to_hash, datalen); data_to_hash = buffer; datalen = IMA_EVENT_NAME_LEN_MAX + 1; } rc = crypto_shash_update(shash, data_to_hash, datalen); if (rc) break; } if (!rc) rc = crypto_shash_final(shash, entry->digests[tfm_idx].digest); return rc; } int ima_calc_field_array_hash(struct ima_field_data *field_data, struct ima_template_entry *entry) { u16 alg_id; int rc, i; rc = ima_calc_field_array_hash_tfm(field_data, entry, ima_sha1_idx); if (rc) return rc; entry->digests[ima_sha1_idx].alg_id = TPM_ALG_SHA1; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) { if (i == ima_sha1_idx) continue; if (i < NR_BANKS(ima_tpm_chip)) { alg_id = ima_tpm_chip->allocated_banks[i].alg_id; entry->digests[i].alg_id = alg_id; } /* for unmapped TPM algorithms digest is still a padded SHA1 */ if (!ima_algo_array[i].tfm) { memcpy(entry->digests[i].digest, entry->digests[ima_sha1_idx].digest, TPM_DIGEST_SIZE); continue; } rc = ima_calc_field_array_hash_tfm(field_data, entry, i); if (rc) return rc; } return rc; } static int calc_buffer_ahash_atfm(const void *buf, loff_t len, struct ima_digest_data *hash, struct crypto_ahash *tfm) { struct ahash_request *req; struct scatterlist sg; struct crypto_wait wait; int rc, ahash_rc = 0; hash->length = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) return -ENOMEM; crypto_init_wait(&wait); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); rc = ahash_wait(crypto_ahash_init(req), &wait); if (rc) goto out; sg_init_one(&sg, buf, len); ahash_request_set_crypt(req, &sg, NULL, len); ahash_rc = crypto_ahash_update(req); /* wait for the update request to complete */ rc = ahash_wait(ahash_rc, &wait); if (!rc) { ahash_request_set_crypt(req, NULL, hash->digest, 0); rc = ahash_wait(crypto_ahash_final(req), &wait); } out: ahash_request_free(req); return rc; } static int calc_buffer_ahash(const void *buf, loff_t len, struct ima_digest_data *hash) { struct crypto_ahash *tfm; int rc; tfm = ima_alloc_atfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = calc_buffer_ahash_atfm(buf, len, hash, tfm); ima_free_atfm(tfm); return rc; } static int calc_buffer_shash_tfm(const void *buf, loff_t size, struct ima_digest_data *hash, struct crypto_shash *tfm) { SHASH_DESC_ON_STACK(shash, tfm); unsigned int len; int rc; shash->tfm = tfm; hash->length = crypto_shash_digestsize(tfm); rc = crypto_shash_init(shash); if (rc != 0) return rc; while (size) { len = size < PAGE_SIZE ? size : PAGE_SIZE; rc = crypto_shash_update(shash, buf, len); if (rc) break; buf += len; size -= len; } if (!rc) rc = crypto_shash_final(shash, hash->digest); return rc; } static int calc_buffer_shash(const void *buf, loff_t len, struct ima_digest_data *hash) { struct crypto_shash *tfm; int rc; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = calc_buffer_shash_tfm(buf, len, hash, tfm); ima_free_tfm(tfm); return rc; } int ima_calc_buffer_hash(const void *buf, loff_t len, struct ima_digest_data *hash) { int rc; if (ima_ahash_minsize && len >= ima_ahash_minsize) { rc = calc_buffer_ahash(buf, len, hash); if (!rc) return 0; } return calc_buffer_shash(buf, len, hash); } static void ima_pcrread(u32 idx, struct tpm_digest *d) { if (!ima_tpm_chip) return; if (tpm_pcr_read(ima_tpm_chip, idx, d) != 0) pr_err("Error Communicating to TPM chip\n"); } /* * The boot_aggregate is a cumulative hash over TPM registers 0 - 7. With * TPM 1.2 the boot_aggregate was based on reading the SHA1 PCRs, but with * TPM 2.0 hash agility, TPM chips could support multiple TPM PCR banks, * allowing firmware to configure and enable different banks. * * Knowing which TPM bank is read to calculate the boot_aggregate digest * needs to be conveyed to a verifier. For this reason, use the same * hash algorithm for reading the TPM PCRs as for calculating the boot * aggregate digest as stored in the measurement list. */ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id, struct crypto_shash *tfm) { struct tpm_digest d = { .alg_id = alg_id, .digest = {0} }; int rc; u32 i; SHASH_DESC_ON_STACK(shash, tfm); shash->tfm = tfm; pr_devel("calculating the boot-aggregate based on TPM bank: %04x\n", d.alg_id); rc = crypto_shash_init(shash); if (rc != 0) return rc; /* cumulative digest over TPM registers 0-7 */ for (i = TPM_PCR0; i < TPM_PCR8; i++) { ima_pcrread(i, &d); /* now accumulate with current aggregate */ rc = crypto_shash_update(shash, d.digest, crypto_shash_digestsize(tfm)); if (rc != 0) return rc; } /* * Extend cumulative digest over TPM registers 8-9, which contain * measurement for the kernel command line (reg. 8) and image (reg. 9) * in a typical PCR allocation. Registers 8-9 are only included in * non-SHA1 boot_aggregate digests to avoid ambiguity. */ if (alg_id != TPM_ALG_SHA1) { for (i = TPM_PCR8; i < TPM_PCR10; i++) { ima_pcrread(i, &d); rc = crypto_shash_update(shash, d.digest, crypto_shash_digestsize(tfm)); } } if (!rc) crypto_shash_final(shash, digest); return rc; } int ima_calc_boot_aggregate(struct ima_digest_data *hash) { struct crypto_shash *tfm; u16 crypto_id, alg_id; int rc, i, bank_idx = -1; for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++) { crypto_id = ima_tpm_chip->allocated_banks[i].crypto_id; if (crypto_id == hash->algo) { bank_idx = i; break; } if (crypto_id == HASH_ALGO_SHA256) bank_idx = i; if (bank_idx == -1 && crypto_id == HASH_ALGO_SHA1) bank_idx = i; } if (bank_idx == -1) { pr_err("No suitable TPM algorithm for boot aggregate\n"); return 0; } hash->algo = ima_tpm_chip->allocated_banks[bank_idx].crypto_id; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); hash->length = crypto_shash_digestsize(tfm); alg_id = ima_tpm_chip->allocated_banks[bank_idx].alg_id; rc = ima_calc_boot_aggregate_tfm(hash->digest, alg_id, tfm); ima_free_tfm(tfm); return rc; } |
| 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Landlock LSM - Object management * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI */ #ifndef _SECURITY_LANDLOCK_OBJECT_H #define _SECURITY_LANDLOCK_OBJECT_H #include <linux/compiler_types.h> #include <linux/refcount.h> #include <linux/spinlock.h> struct landlock_object; /** * struct landlock_object_underops - Operations on an underlying object */ struct landlock_object_underops { /** * @release: Releases the underlying object (e.g. iput() for an inode). */ void (*release)(struct landlock_object *const object) __releases(object->lock); }; /** * struct landlock_object - Security blob tied to a kernel object * * The goal of this structure is to enable to tie a set of ephemeral access * rights (pertaining to different domains) to a kernel object (e.g an inode) * in a safe way. This implies to handle concurrent use and modification. * * The lifetime of a &struct landlock_object depends on the rules referring to * it. */ struct landlock_object { /** * @usage: This counter is used to tie an object to the rules matching * it or to keep it alive while adding a new rule. If this counter * reaches zero, this struct must not be modified, but this counter can * still be read from within an RCU read-side critical section. When * adding a new rule to an object with a usage counter of zero, we must * wait until the pointer to this object is set to NULL (or recycled). */ refcount_t usage; /** * @lock: Protects against concurrent modifications. This lock must be * held from the time @usage drops to zero until any weak references * from @underobj to this object have been cleaned up. * * Lock ordering: inode->i_lock nests inside this. */ spinlock_t lock; /** * @underobj: Used when cleaning up an object and to mark an object as * tied to its underlying kernel structure. This pointer is protected * by @lock. Cf. landlock_release_inodes() and release_inode(). */ void *underobj; union { /** * @rcu_free: Enables lockless use of @usage, @lock and * @underobj from within an RCU read-side critical section. * @rcu_free and @underops are only used by * landlock_put_object(). */ struct rcu_head rcu_free; /** * @underops: Enables landlock_put_object() to release the * underlying object (e.g. inode). */ const struct landlock_object_underops *underops; }; }; struct landlock_object * landlock_create_object(const struct landlock_object_underops *const underops, void *const underobj); void landlock_put_object(struct landlock_object *const object); static inline void landlock_get_object(struct landlock_object *const object) { if (object) refcount_inc(&object->usage); } #endif /* _SECURITY_LANDLOCK_OBJECT_H */ |
| 327 316 5702 3365 1901 2456 2456 1348 1348 12236 849 1339 2489 204 468 469 469 93 93 93 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 | /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _LINUX_MAPLE_TREE_H #define _LINUX_MAPLE_TREE_H /* * Maple Tree - An RCU-safe adaptive tree for storing ranges * Copyright (c) 2018-2022 Oracle * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> * Matthew Wilcox <willy@infradead.org> */ #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> /* #define CONFIG_MAPLE_RCU_DISABLED */ /* * Allocated nodes are mutable until they have been inserted into the tree, * at which time they cannot change their type until they have been removed * from the tree and an RCU grace period has passed. * * Removed nodes have their ->parent set to point to themselves. RCU readers * check ->parent before relying on the value that they loaded from the * slots array. This lets us reuse the slots array for the RCU head. * * Nodes in the tree point to their parent unless bit 0 is set. */ #if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) /* 64bit sizes */ #define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) #else /* 32bit sizes */ #define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ #define MAPLE_NODE_MASK 255UL /* * The node->parent of the root node has bit 0 set and the rest of the pointer * is a pointer to the tree itself. No more bits are available in this pointer * (on m68k, the data structure may only be 2-byte aligned). * * Internal non-root nodes can only have maple_range_* nodes as parents. The * parent pointer is 256B aligned like all other tree nodes. When storing a 32 * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an * extra bit to store the offset. This extra bit comes from a reuse of the last * bit in the node type. This is possible by using bit 1 to indicate if bit 2 * is part of the type or the slot. * * Once the type is decided, the decision of an allocation range type or a * range type is done by examining the immutable tree flag for the * MT_FLAGS_ALLOC_RANGE flag. * * Node types: * 0b??1 = Root * 0b?00 = 16 bit nodes * 0b010 = 32 bit nodes * 0b110 = 64 bit nodes * * Slot size and location in the parent pointer: * type : slot location * 0b??1 : Root * 0b?00 : 16 bit values, type in 0-1, slot in 2-6 * 0b010 : 32 bit values, type in 0-2, slot in 3-6 * 0b110 : 64 bit values, type in 0-2, slot in 3-6 */ /* * This metadata is used to optimize the gap updating code and in reverse * searching for gaps or any other code that needs to find the end of the data. */ struct maple_metadata { unsigned char end; /* end of data */ unsigned char gap; /* offset of largest gap */ }; /* * Leaf nodes do not store pointers to nodes, they store user data. Users may * store almost any bit pattern. As noted above, the optimisation of storing an * entry at 0 in the root pointer cannot be done for data which have the bottom * two bits set to '10'. We also reserve values with the bottom two bits set to * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs * return errnos as a negative errno shifted right by two bits and the bottom * two bits set to '10', and while choosing to store these values in the array * is not an error, it may lead to confusion if you're testing for an error with * mas_is_err(). * * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now. * * In regular B-Tree terms, pivots are called keys. The term pivot is used to * indicate that the tree is specifying ranges, Pivots may appear in the * subtree with an entry attached to the value whereas keys are unique to a * specific position of a B-tree. Pivot values are inclusive of the slot with * the same index. */ struct maple_range_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_RANGE64_SLOTS - 1]; union { void __rcu *slot[MAPLE_RANGE64_SLOTS]; struct { void __rcu *pad[MAPLE_RANGE64_SLOTS - 1]; struct maple_metadata meta; }; }; }; /* * At tree creation time, the user can specify that they're willing to trade off * storing fewer entries in a tree in return for storing more information in * each node. * * The maple tree supports recording the largest range of NULL entries available * in this node, also called gaps. This optimises the tree for allocating a * range. */ struct maple_arange_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1]; void __rcu *slot[MAPLE_ARANGE64_SLOTS]; unsigned long gap[MAPLE_ARANGE64_SLOTS]; struct maple_metadata meta; }; struct maple_alloc { unsigned long total; unsigned char node_count; unsigned int request_count; struct maple_alloc *slot[MAPLE_ALLOC_SLOTS]; }; struct maple_topiary { struct maple_pnode *parent; struct maple_enode *next; /* Overlaps the pivot */ }; enum maple_type { maple_dense, maple_leaf_64, maple_range_64, maple_arange_64, }; enum store_type { wr_invalid, wr_new_root, wr_store_root, wr_exact_fit, wr_spanning_store, wr_split_store, wr_rebalance, wr_append, wr_node_store, wr_slot_store, }; /** * DOC: Maple tree flags * * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree * * MT_FLAGS_USE_RCU - Operate in RCU mode * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value * * MT_FLAGS_LOCK_MASK - How the mt_lock is used * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe * * MT_FLAGS_LOCK_BH - Acquired bh-safe * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used * * MAPLE_HEIGHT_MAX The largest height that can be stored */ #define MT_FLAGS_ALLOC_RANGE 0x01 #define MT_FLAGS_USE_RCU 0x02 #define MT_FLAGS_HEIGHT_OFFSET 0x02 #define MT_FLAGS_HEIGHT_MASK 0x7C #define MT_FLAGS_LOCK_MASK 0x300 #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 #define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 #define MAPLE_NODE_TYPE_MASK 0x0F #define MAPLE_NODE_TYPE_SHIFT 0x03 #define MAPLE_RESERVED_RANGE 4096 #ifdef CONFIG_LOCKDEP #define mt_lock_is_held(mt) \ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) #define mt_write_lock_is_held(mt) \ (!(mt)->ma_external_lock || \ lock_is_held_type((mt)->ma_external_lock, 0)) #define mt_set_external_lock(mt, lock) \ (mt)->ma_external_lock = &(lock)->dep_map #define mt_on_stack(mt) (mt).ma_external_lock = NULL #else #define mt_lock_is_held(mt) 1 #define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) #define mt_on_stack(mt) do { } while (0) #endif /* * If the tree contains a single entry at index 0, it is usually stored in * tree->ma_root. To optimise for the page cache, an entry which ends in '00', * '01' or '11' is stored in the root, but an entry which ends in '10' will be * stored in a node. Bits 3-6 are used to store enum maple_type. * * The flags are used both to store some immutable information about this tree * (set at tree creation time) and dynamic information set under the spinlock. * * Another use of flags are to indicate global states of the tree. This is the * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in * RCU mode. This mode was added to allow the tree to reuse nodes instead of * re-allocating and RCU freeing nodes when there is a single user. */ struct maple_tree { union { spinlock_t ma_lock; #ifdef CONFIG_LOCKDEP struct lockdep_map *ma_external_lock; #endif }; unsigned int ma_flags; void __rcu *ma_root; }; /** * MTREE_INIT() - Initialize a maple tree * @name: The maple tree name * @__flags: The maple tree flags * */ #define MTREE_INIT(name, __flags) { \ .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ .ma_flags = __flags, \ .ma_root = NULL, \ } /** * MTREE_INIT_EXT() - Initialize a maple tree with an external lock. * @name: The tree name * @__flags: The maple tree flags * @__lock: The external lock */ #ifdef CONFIG_LOCKDEP #define MTREE_INIT_EXT(name, __flags, __lock) { \ .ma_external_lock = &(__lock).dep_map, \ .ma_flags = (__flags), \ .ma_root = NULL, \ } #else #define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) #endif #define DEFINE_MTREE(name) \ struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) #define mtree_lock_nested(mas, subclass) \ spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* * The Maple Tree squeezes various bits in at various points which aren't * necessarily obvious. Usually, this is done by observing that pointers are * N-byte aligned and thus the bottom log_2(N) bits are available for use. We * don't use the high bits of pointers to store additional information because * we don't know what bits are unused on any given architecture. * * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8 * low bits for our own purposes. Nodes are currently of 4 types: * 1. Single pointer (Range is 0-0) * 2. Non-leaf Allocation Range nodes * 3. Non-leaf Range nodes * 4. Leaf Range nodes All nodes consist of a number of node slots, * pivots, and a parent pointer. */ struct maple_node { union { struct { struct maple_pnode *parent; void __rcu *slot[MAPLE_NODE_SLOTS]; }; struct { void *pad; struct rcu_head rcu; struct maple_enode *piv_parent; unsigned char parent_slot; enum maple_type type; unsigned char slot_len; unsigned int ma_flags; }; struct maple_range_64 mr64; struct maple_arange_64 ma64; struct maple_alloc alloc; }; }; /* * More complicated stores can cause two nodes to become one or three and * potentially alter the height of the tree. Either half of the tree may need * to be rebalanced against the other. The ma_topiary struct is used to track * which nodes have been 'cut' from the tree so that the change can be done * safely at a later date. This is done to support RCU. */ struct ma_topiary { struct maple_enode *head; struct maple_enode *tail; struct maple_tree *mtree; }; void *mtree_load(struct maple_tree *mt, unsigned long index); int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); int mtree_insert_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_store_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); /** * mtree_empty() - Determine if a tree has any present entries. * @mt: Maple Tree. * * Context: Any context. * Return: %true if the tree contains only NULL pointers. */ static inline bool mtree_empty(const struct maple_tree *mt) { return mt->ma_root == NULL; } /* Advanced API */ /* * Maple State Status * ma_active means the maple state is pointing to a node and offset and can * continue operating on the tree. * ma_start means we have not searched the tree. * ma_root means we have searched the tree and the entry we found lives in * the root of the tree (ie it has index 0, length 1 and is the only entry in * the tree). * ma_none means we have searched the tree and there is no node in the * tree for this entry. For example, we searched for index 1 in an empty * tree. Or we have a tree which points to a full leaf node and we * searched for an entry which is larger than can be contained in that * leaf node. * ma_pause means the data within the maple state may be stale, restart the * operation * ma_overflow means the search has reached the upper limit of the search * ma_underflow means the search has reached the lower limit of the search * ma_error means there was an error, check the node for the error number. */ enum maple_status { ma_active, ma_start, ma_root, ma_none, ma_pause, ma_overflow, ma_underflow, ma_error, }; /* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the * advanced API. * * If state->node has bit 0 set then it references a tree location which is not * a node (eg the root). If bit 1 is set, the rest of the bits are a negative * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the * node type. * * state->alloc either has a request number of nodes or an allocated node. If * stat->alloc has a requested number of nodes, the first bit will be set (0x1) * and the remaining bits are the value. If state->alloc is a node, then the * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for * storing more allocated nodes, a total number of nodes allocated, and the * node_count in this node. node_count is the number of allocated nodes in this * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc * by removing a node from the state->alloc node until state->alloc->node_count * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted * to state->alloc. Nodes are pushed onto state->alloc by putting the current * state->alloc into the pushed node's slot[0]. * * The state also contains the implied min/max of the state->node, the depth of * this search, and the offset. The implied min/max are either from the parent * node or are 0-oo for the root node. The depth is incremented or decremented * every time a node is walked down or up. The offset is the slot/pivot of * interest in the node - either for reading or writing. * * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. * * The status of the state is used to determine how the next action should treat * the state. For instance, if the status is ma_start then the next action * should start at the root of the tree and walk down. If the status is * ma_pause then the node may be stale data and should be discarded. If the * status is ma_overflow, then the last action hit the upper limit. * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ unsigned long index; /* The index we're operating on - range start */ unsigned long last; /* The last index we're operating on - range end */ struct maple_enode *node; /* The node containing this entry */ unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct slab_sheaf *sheaf; /* Allocated nodes for this operation */ struct maple_node *alloc; /* A single allocated node for fast path writes */ unsigned long node_request; /* The number of nodes to allocate for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; unsigned char end; /* The end of the node */ enum store_type store_type; /* The type of store needed for this operation */ }; struct ma_wr_state { struct ma_state *mas; struct maple_node *node; /* Decoded mas->node */ unsigned long r_min; /* range min */ unsigned long r_max; /* range max */ enum maple_type type; /* mas->node type */ unsigned char offset_end; /* The offset where the write ends */ unsigned long *pivots; /* mas->node->pivots pointer */ unsigned long end_piv; /* The pivot at the offset end */ void __rcu **slots; /* mas->node->slots pointer */ void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ unsigned char vacant_height; /* Height of lowest node with free space */ unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) #define mas_lock_nested(mas, subclass) \ spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) /* * Special values for ma_state.node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) /* * When changing MA_STATE, remember to also change rust/kernel/maple_tree.rs */ #define MA_STATE(name, mt, first, end) \ struct ma_state name = { \ .tree = mt, \ .index = first, \ .last = end, \ .node = NULL, \ .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .sheaf = NULL, \ .alloc = NULL, \ .node_request = 0, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ struct ma_wr_state name = { \ .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ .vacant_height = 0, \ .sufficient_height = 0 \ } #define MA_TOPIARY(name, tree) \ struct ma_topiary name = { \ .head = NULL, \ .tail = NULL, \ .mtree = tree, \ } void *mas_walk(struct ma_state *mas); void *mas_store(struct ma_state *mas, void *entry); void *mas_erase(struct ma_state *mas); int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); void maple_tree_init(void); void mas_destroy(struct ma_state *mas); int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries); void *mas_prev(struct ma_state *mas, unsigned long min); void *mas_prev_range(struct ma_state *mas, unsigned long max); void *mas_next(struct ma_state *mas, unsigned long max); void *mas_next_range(struct ma_state *mas, unsigned long max); int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); /* * This finds an empty area from the highest address to the lowest. * AKA "Topdown" version, */ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, unsigned long addr) { memset(mas, 0, sizeof(struct ma_state)); mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; mas->status = ma_start; mas->node = NULL; } static inline bool mas_is_active(struct ma_state *mas) { return mas->status == ma_active; } static inline bool mas_is_err(struct ma_state *mas) { return mas->status == ma_error; } /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. * * Resets the error or walk state of the @mas so future walks of the * array will start from the root. Use this if you have dropped the * lock and want to reuse the ma_state. * * Context: Any context. */ static __always_inline void mas_reset(struct ma_state *mas) { mas->status = ma_start; mas->node = NULL; } /** * mas_for_each() - Iterate over a range of the maple tree. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__max: maximum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) /** * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__min: minimum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each_rev(__mas, __entry, __min) \ while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, mt_dump_hex, }; extern atomic_t maple_tree_tests_run; extern atomic_t maple_tree_tests_passed; void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); void mas_dump(const struct ma_state *mas); void mas_wr_dump(const struct ma_wr_state *wr_mas); void mt_validate(struct maple_tree *mt); void mt_cache_shrink(void); #define MT_BUG_ON(__tree, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_BUG_ON(__mas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_WR_BUG_ON(__wrmas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MT_WARN_ON(__tree, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WARN_ON(__mas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WR_WARN_ON(__wrmas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #else #define MT_BUG_ON(__tree, __x) BUG_ON(__x) #define MAS_BUG_ON(__mas, __x) BUG_ON(__x) #define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) #define MT_WARN_ON(__tree, __x) WARN_ON(__x) #define MAS_WARN_ON(__mas, __x) WARN_ON(__x) #define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) #endif /* CONFIG_DEBUG_MAPLE_TREE */ /** * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the * current location. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * set the internal maple state values to a sub-range. * Please use mas_set_range() if you do not know where you are in the tree. */ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { /* Ensure the range starts within the current slot */ MAS_WARN_ON(mas, mas_is_active(mas) && (mas->index > start || mas->last < start)); mas->index = start; mas->last = last; } /** * mas_set_range() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * Move the operation state to refer to a different range. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { mas_reset(mas); __mas_set_range(mas, start, last); } /** * mas_set() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @index: New index into the Maple Tree. * * Move the operation state to refer to a different index. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set(struct ma_state *mas, unsigned long index) { mas_set_range(mas, index, index); } static inline bool mt_external_lock(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; } /** * mt_init_flags() - Initialise an empty maple tree with flags. * @mt: Maple Tree * @flags: maple tree flags. * * If you need to initialise a Maple Tree with special flags (eg, an * allocation tree), use this function. * * Context: Any context. */ static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) { mt->ma_flags = flags; if (!mt_external_lock(mt)) spin_lock_init(&mt->ma_lock); rcu_assign_pointer(mt->ma_root, NULL); } /** * mt_init() - Initialise an empty maple tree. * @mt: Maple Tree * * An empty Maple Tree. * * Context: Any context. */ static inline void mt_init(struct maple_tree *mt) { mt_init_flags(mt, 0); } static inline bool mt_in_rcu(struct maple_tree *mt) { #ifdef CONFIG_MAPLE_RCU_DISABLED return false; #endif return mt->ma_flags & MT_FLAGS_USE_RCU; } /** * mt_clear_in_rcu() - Switch the tree to non-RCU mode. * @mt: The Maple Tree */ static inline void mt_clear_in_rcu(struct maple_tree *mt) { if (!mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags &= ~MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags &= ~MT_FLAGS_USE_RCU; mtree_unlock(mt); } } /** * mt_set_in_rcu() - Switch the tree to RCU safe mode. * @mt: The Maple Tree */ static inline void mt_set_in_rcu(struct maple_tree *mt) { if (mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags |= MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags |= MT_FLAGS_USE_RCU; mtree_unlock(mt); } } static inline unsigned int mt_height(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_find_after(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min); void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); /** * mt_for_each - Iterate over each entry starting at index until max. * @__tree: The Maple Tree * @__entry: The current entry * @__index: The index to start the search from. Subsequently used as iterator. * @__max: The maximum limit for @index * * This iterator skips all entries, which resolve to a NULL pointer, * e.g. entries which has been reserved with XA_ZERO_ENTRY. */ #define mt_for_each(__tree, __entry, __index, __max) \ for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) #endif /*_LINUX_MAPLE_TREE_H */ |
| 16 48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * syscalls.h - Linux syscall interfaces (non-arch-specific) * * Copyright (c) 2004 Randy Dunlap * Copyright (c) 2004 Open Source Development Labs */ #ifndef _LINUX_SYSCALLS_H #define _LINUX_SYSCALLS_H struct __aio_sigset; struct epoll_event; struct iattr; struct inode; struct iocb; struct io_event; struct iovec; struct __kernel_old_itimerval; struct kexec_segment; struct linux_dirent; struct linux_dirent64; struct list_head; struct mmap_arg_struct; struct msgbuf; struct user_msghdr; struct mmsghdr; struct msqid_ds; struct new_utsname; struct nfsctl_arg; struct __old_kernel_stat; struct oldold_utsname; struct old_utsname; struct pollfd; struct rlimit; struct rlimit64; struct rusage; struct sched_param; struct sched_attr; struct sel_arg_struct; struct semaphore; struct sembuf; struct shmid_ds; struct sockaddr; struct stat; struct stat64; struct statfs; struct statfs64; struct statx; struct sysinfo; struct timespec; struct __kernel_old_timeval; struct __kernel_timex; struct timezone; struct tms; struct utimbuf; struct mq_attr; struct compat_stat; struct old_timeval32; struct robust_list_head; struct futex_waitv; struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; struct file_handle; struct sigaltstack; struct rseq; union bpf_attr; struct io_uring_params; struct clone_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; struct lsm_ctx; enum landlock_rule_type; struct cachestat_range; struct cachestat; struct statmount; struct mnt_id_req; struct xattr_args; struct file_attr; #include <linux/types.h> #include <linux/aio_abi.h> #include <linux/capability.h> #include <linux/signal.h> #include <linux/list.h> #include <linux/bug.h> #include <linux/sem.h> #include <asm/siginfo.h> #include <linux/unistd.h> #include <linux/quota.h> #include <linux/key.h> #include <linux/personality.h> #include <trace/syscall.h> #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER /* * It may be useful for an architecture to override the definitions of the * SYSCALL_DEFINE0() and __SYSCALL_DEFINEx() macros, in particular to use a * different calling convention for syscalls. To allow for that, the prototypes * for the sys_*() functions below will *not* be included if * CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ #include <asm/syscall_wrapper.h> #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ /* * __MAP - apply a macro to syscall arguments * __MAP(n, m, t1, a1, t2, a2, ..., tn, an) will expand to * m(t1, a1), m(t2, a2), ..., m(tn, an) * The first argument must be equal to the amount of type/name * pairs given. Note that this list of pairs (i.e. the arguments * of __MAP starting at the third one) is in the same format as * for SYSCALL_DEFINE<n>/COMPAT_SYSCALL_DEFINE<n> */ #define __MAP0(m,...) #define __MAP1(m,t,a,...) m(t,a) #define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__) #define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__) #define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__) #define __MAP5(m,t,a,...) m(t,a), __MAP4(m,__VA_ARGS__) #define __MAP6(m,t,a,...) m(t,a), __MAP5(m,__VA_ARGS__) #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a #define __TYPE_AS(t, v) __same_type((__force t)0, v) #define __TYPE_IS_L(t) (__TYPE_AS(t, 0L)) #define __TYPE_IS_UL(t) (__TYPE_AS(t, 0UL)) #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a #define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL(t, a) #a #define __SC_STR_TDECL(t, a) #t extern struct trace_event_class event_class_syscall_enter; extern struct trace_event_class event_class_syscall_exit; extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ static struct trace_event_call __used \ event_enter_##sname = { \ .class = &event_class_syscall_enter, \ { \ .name = "sys_enter"#sname, \ }, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ __section("_ftrace_events") \ *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ static struct trace_event_call __used \ event_exit_##sname = { \ .class = &event_class_syscall_exit, \ { \ .name = "sys_exit"#sname, \ }, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ __section("_ftrace_events") \ *__event_exit_##sname = &event_exit_##sname; #define SYSCALL_METADATA(sname, nb, ...) \ static const char *types_##sname[] = { \ __MAP(nb,__SC_STR_TDECL,__VA_ARGS__) \ }; \ static const char *args_##sname[] = { \ __MAP(nb,__SC_STR_ADECL,__VA_ARGS__) \ }; \ SYSCALL_TRACE_ENTER_EVENT(sname); \ SYSCALL_TRACE_EXIT_EVENT(sname); \ static struct syscall_metadata __used \ __syscall_meta_##sname = { \ .name = "sys"#sname, \ .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = nb, \ .types = nb ? types_##sname : NULL, \ .args = nb ? args_##sname : NULL, \ .enter_event = &event_enter_##sname, \ .exit_event = &event_exit_##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ }; \ static struct syscall_metadata __used \ __section("__syscalls_metadata") \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; static inline int is_syscall_trace_event(struct trace_event_call *tp_event) { return tp_event->class == &event_class_syscall_enter || tp_event->class == &event_class_syscall_exit; } #else #define SYSCALL_METADATA(sname, nb, ...) static inline int is_syscall_trace_event(struct trace_event_call *tp_event) { return 0; } #endif #ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long sys_##sname(void); \ ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ asmlinkage long sys_##sname(void) #endif /* SYSCALL_DEFINE0 */ #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) #define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) #define SYSCALL_DEFINE_MAXARGS 6 #define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) /* * The asmlinkage stub is aliased to a function named __se_sys_*() which * sign-extends 32-bit ints to longs whenever needed. The actual work is * done within __do_sys_*(). */ #ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ /* For split 64-bit arguments on 32-bit architectures */ #ifdef __LITTLE_ENDIAN #define SC_ARG64(name) u32, name##_lo, u32, name##_hi #else #define SC_ARG64(name) u32, name##_hi, u32, name##_lo #endif #define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) #ifdef CONFIG_COMPAT #define SYSCALL32_DEFINE0 COMPAT_SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 #define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 #define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 #define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 #else #define SYSCALL32_DEFINE0 SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 #define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 #define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 #define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 #endif /* * These syscall function prototypes are kept in the same order as * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, * followed by deprecated or obsolete system calls. * * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. * These functions should not be called elsewhere from kernel code. * * As the syscall calling convention may be different from the default * for architectures overriding the syscall calling convention, do not * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); asmlinkage long sys_io_destroy(aio_context_t ctx); asmlinkage long sys_io_submit(aio_context_t, long, struct iocb __user * __user *); asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, struct io_event __user *result); asmlinkage long sys_io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout); asmlinkage long sys_io_getevents_time32(__u32 ctx_id, __s32 min_nr, __s32 nr, struct io_event __user *events, struct old_timespec32 __user *timeout); asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __aio_sigset __user *sig); asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct old_timespec32 __user *timeout, const struct __aio_sigset __user *sig); asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, u32 min_complete, u32 flags, const void __user *argp, size_t argsz); asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); asmlinkage long sys_setxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags, const char __user *name, const struct xattr_args __user *args, size_t size); asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_getxattr(const char __user *path, const char __user *name, void __user *value, size_t size); asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags, const char __user *name, struct xattr_args __user *args, size_t size); asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size); asmlinkage long sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size); asmlinkage long sys_listxattr(const char __user *path, char __user *list, size_t size); asmlinkage long sys_listxattrat(int dfd, const char __user *path, unsigned int at_flags, char __user *list, size_t size); asmlinkage long sys_llistxattr(const char __user *path, char __user *list, size_t size); asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); asmlinkage long sys_removexattrat(int dfd, const char __user *path, unsigned int at_flags, const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_file_getattr(int dfd, const char __user *filename, struct file_attr __user *attr, size_t usize, unsigned int at_flags); asmlinkage long sys_file_setattr(int dfd, const char __user *filename, struct file_attr __user *attr, size_t usize, unsigned int at_flags); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event); asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const sigset_t __user *sigmask, size_t sigsetsize); asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events, int maxevents, const struct __kernel_timespec __user *timeout, const sigset_t __user *sigmask, size_t sigsetsize); asmlinkage long sys_dup(unsigned int fildes); asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); #if BITS_PER_LONG == 32 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif asmlinkage long sys_inotify_init1(int flags); asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask); asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_flock(unsigned int fd, unsigned int cmd); asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, unsigned dev); asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); asmlinkage long sys_symlinkat(const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_linkat(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, int flags); asmlinkage long sys_renameat(int olddfd, const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_umount(char __user *name, int flags); asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old); asmlinkage long sys_statfs(const char __user * path, struct statfs __user *buf); asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf); asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf); asmlinkage long sys_statmount(const struct mnt_id_req __user *req, struct statmount __user *buf, size_t bufsize, unsigned int flags); asmlinkage long sys_listmount(const struct mnt_id_req __user *req, u64 __user *mnt_ids, size_t nr_mnt_ids, unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); asmlinkage long sys_ftruncate(unsigned int fd, off_t length); #if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode, int flags); asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_chroot(const char __user *filename); asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); asmlinkage long sys_fchmodat(int dfd, const char __user *filename, umode_t mode); asmlinkage long sys_fchmodat2(int dfd, const char __user *filename, umode_t mode, unsigned int flags); asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_openat2(int dfd, const char __user *filename, struct open_how __user *how, size_t size); asmlinkage long sys_close(unsigned int fd); asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); asmlinkage long sys_vhangup(void); asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr); asmlinkage long sys_quotactl_fd(unsigned int fd, unsigned int cmd, qid_t id, void __user *addr); asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int whence); asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int whence); asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); asmlinkage long sys_write(unsigned int fd, const char __user *buf, size_t count); asmlinkage long sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); asmlinkage long sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); asmlinkage long sys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos); asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count); asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, fd_set __user *, struct __kernel_timespec __user *, void __user *); asmlinkage long sys_pselect6_time32(int, fd_set __user *, fd_set __user *, fd_set __user *, struct old_timespec32 __user *, void __user *); asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, struct __kernel_timespec __user *, const sigset_t __user *, size_t); asmlinkage long sys_ppoll_time32(struct pollfd __user *, unsigned int, struct old_timespec32 __user *, const sigset_t __user *, size_t); asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags); asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags); asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags); asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_newfstatat(int dfd, const char __user *filename, struct stat __user *statbuf, int flag); asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); asmlinkage long sys_fstatat64(int dfd, const char __user *filename, struct stat64 __user *statbuf, int flag); #endif asmlinkage long sys_sync(void); asmlinkage long sys_fsync(unsigned int fd); asmlinkage long sys_fdatasync(unsigned int fd); asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, loff_t offset, loff_t nbytes); asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags); asmlinkage long sys_timerfd_create(int clockid, int flags); asmlinkage long sys_timerfd_settime(int ufd, int flags, const struct __kernel_itimerspec __user *utmr, struct __kernel_itimerspec __user *otmr); asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr); asmlinkage long sys_timerfd_gettime32(int ufd, struct old_itimerspec32 __user *otmr); asmlinkage long sys_timerfd_settime32(int ufd, int flags, const struct old_itimerspec32 __user *utmr, struct old_itimerspec32 __user *otmr); asmlinkage long sys_utimensat(int dfd, const char __user *filename, struct __kernel_timespec __user *utimes, int flags); asmlinkage long sys_utimensat_time32(unsigned int dfd, const char __user *filename, struct old_timespec32 __user *t, int flags); asmlinkage long sys_acct(const char __user *name); asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr); asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data); asmlinkage long sys_personality(unsigned int personality); asmlinkage long sys_exit(int error_code); asmlinkage long sys_exit_group(int error_code); asmlinkage long sys_waitid(int which, pid_t pid, struct siginfo __user *infop, int options, struct rusage __user *ru); asmlinkage long sys_set_tid_address(int __user *tidptr); asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, const struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); asmlinkage long sys_futex_time32(u32 __user *uaddr, int op, u32 val, const struct old_timespec32 __user *utime, u32 __user *uaddr2, u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr); asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, unsigned int flags, int nr_wake, int nr_requeue); asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, struct old_timespec32 __user *rmtp); asmlinkage long sys_getitimer(int which, struct __kernel_old_itimerval __user *value); asmlinkage long sys_setitimer(int which, struct __kernel_old_itimerval __user *value, struct __kernel_old_itimerval __user *ovalue); asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments, unsigned long flags); asmlinkage long sys_init_module(void __user *umod, unsigned long len, const char __user *uargs); asmlinkage long sys_delete_module(const char __user *name_user, unsigned int flags); asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, timer_t __user * created_timer_id); asmlinkage long sys_timer_gettime(timer_t timer_id, struct __kernel_itimerspec __user *setting); asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, const struct __kernel_itimerspec __user *new_setting, struct __kernel_itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, const struct __kernel_timespec __user *tp); asmlinkage long sys_clock_gettime(clockid_t which_clock, struct __kernel_timespec __user *tp); asmlinkage long sys_clock_getres(clockid_t which_clock, struct __kernel_timespec __user *tp); asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, const struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_timer_gettime32(timer_t timer_id, struct old_itimerspec32 __user *setting); asmlinkage long sys_timer_settime32(timer_t timer_id, int flags, struct old_itimerspec32 __user *new, struct old_itimerspec32 __user *old); asmlinkage long sys_clock_settime32(clockid_t which_clock, struct old_timespec32 __user *tp); asmlinkage long sys_clock_gettime32(clockid_t which_clock, struct old_timespec32 __user *tp); asmlinkage long sys_clock_getres_time32(clockid_t which_clock, struct old_timespec32 __user *tp); asmlinkage long sys_clock_nanosleep_time32(clockid_t which_clock, int flags, struct old_timespec32 __user *rqtp, struct old_timespec32 __user *rmtp); asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data); asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param); asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param); asmlinkage long sys_sched_getscheduler(pid_t pid); asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param); asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr); asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr); asmlinkage long sys_sched_yield(void); asmlinkage long sys_sched_get_priority_max(int policy); asmlinkage long sys_sched_get_priority_min(int policy); asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct __kernel_timespec __user *interval); asmlinkage long sys_sched_rr_get_interval_time32(pid_t pid, struct old_timespec32 __user *interval); asmlinkage long sys_restart_syscall(void); asmlinkage long sys_kill(pid_t pid, int sig); asmlinkage long sys_tkill(pid_t pid, int sig); asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, struct sigaltstack __user *uoss); asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); #ifndef CONFIG_ODD_RT_SIGACTION asmlinkage long sys_rt_sigaction(int, const struct sigaction __user *, struct sigaction __user *, size_t); #endif asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize); asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize); asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, siginfo_t __user *uinfo, const struct __kernel_timespec __user *uts, size_t sigsetsize); asmlinkage long sys_rt_sigtimedwait_time32(const sigset_t __user *uthese, siginfo_t __user *uinfo, const struct old_timespec32 __user *uts, size_t sigsetsize); asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_setpriority(int which, int who, int niceval); asmlinkage long sys_getpriority(int which, int who); asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user *arg); asmlinkage long sys_setregid(gid_t rgid, gid_t egid); asmlinkage long sys_setgid(gid_t gid); asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); asmlinkage long sys_setuid(uid_t uid); asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid); asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid); asmlinkage long sys_setfsuid(uid_t uid); asmlinkage long sys_setfsgid(gid_t gid); asmlinkage long sys_times(struct tms __user *tbuf); asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); asmlinkage long sys_getpgid(pid_t pid); asmlinkage long sys_getsid(pid_t pid); asmlinkage long sys_setsid(void); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist); asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist); asmlinkage long sys_newuname(struct new_utsname __user *name); asmlinkage long sys_sethostname(char __user *name, int len); asmlinkage long sys_setdomainname(char __user *name, int len); asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim); asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); asmlinkage long sys_geteuid(void); asmlinkage long sys_getgid(void); asmlinkage long sys_getegid(void); asmlinkage long sys_gettid(void); asmlinkage long sys_sysinfo(struct sysinfo __user *info); asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); asmlinkage long sys_mq_unlink(const char __user *name); asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct __kernel_timespec __user *abs_timeout); asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct __kernel_timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); asmlinkage long sys_mq_timedreceive_time32(mqd_t mqdes, char __user *u_msg_ptr, unsigned int msg_len, unsigned int __user *u_msg_prio, const struct old_timespec32 __user *u_abs_timeout); asmlinkage long sys_mq_timedsend_time32(mqd_t mqdes, const char __user *u_msg_ptr, unsigned int msg_len, unsigned int msg_prio, const struct old_timespec32 __user *u_abs_timeout); asmlinkage long sys_msgget(key_t key, int msgflg); asmlinkage long sys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg); asmlinkage long sys_semget(key_t key, int nsems, int semflg); asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct __kernel_timespec __user *timeout); asmlinkage long sys_semtimedop_time32(int semid, struct sembuf __user *sops, unsigned nsops, const struct old_timespec32 __user *timeout); asmlinkage long sys_semop(int semid, struct sembuf __user *sops, unsigned nsops); asmlinkage long sys_shmget(key_t key, size_t size, int flag); asmlinkage long sys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); asmlinkage long sys_shmdt(char __user *shmaddr); asmlinkage long sys_socket(int, int, int); asmlinkage long sys_socketpair(int, int, int, int __user *); asmlinkage long sys_bind(int, struct sockaddr __user *, int); asmlinkage long sys_listen(int, int); asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); asmlinkage long sys_connect(int, struct sockaddr __user *, int); asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, struct sockaddr __user *, int); asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, struct sockaddr __user *, int __user *); asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen); asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen); asmlinkage long sys_shutdown(int, int); asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); asmlinkage long sys_brk(unsigned long brk); asmlinkage long sys_munmap(unsigned long addr, size_t len); asmlinkage long sys_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, const void __user *_payload, size_t plen, key_serial_t destringid); asmlinkage long sys_request_key(const char __user *_type, const char __user *_description, const char __user *_callout_info, key_serial_t destringid); asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); #ifdef CONFIG_CLONE_BACKWARDS asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); #else #ifdef CONFIG_CLONE_BACKWARDS3 asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, int __user *, unsigned long); #else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, unsigned long); #endif #endif asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size); asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp); asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); /* CONFIG_MMU only */ asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags); asmlinkage long sys_swapoff(const char __user *specialfile); asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot); asmlinkage long sys_msync(unsigned long start, size_t len, int flags); asmlinkage long sys_mlock(unsigned long start, size_t len); asmlinkage long sys_munlock(unsigned long start, size_t len); asmlinkage long sys_mlockall(int flags); asmlinkage long sys_munlockall(void); asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec); asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); asmlinkage long sys_mseal(unsigned long start, size_t len, unsigned long flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, unsigned long maxnode, unsigned flags); asmlinkage long sys_get_mempolicy(int __user *policy, unsigned long __user *nmask, unsigned long maxnode, unsigned long addr, unsigned long flags); asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode); asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *from, const unsigned long __user *to); asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const void __user * __user *pages, const int __user *nodes, int __user *status, int flags); asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_perf_event_open( struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags, struct __kernel_timespec __user *timeout); asmlinkage long sys_recvmmsg_time32(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags, struct old_timespec32 __user *timeout); asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru); asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, const struct rlimit64 __user *new_rlim, struct rlimit64 __user *old_rlim); asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); #if defined(CONFIG_ARCH_SPLIT_ARG64) asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, unsigned int mask_1, unsigned int mask_2, int dfd, const char __user * pathname); #else asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); #endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, void __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, struct __kernel_timex __user *tx); asmlinkage long sys_clock_adjtime32(clockid_t which_clock, struct old_timex32 __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags); asmlinkage long sys_process_vm_readv(pid_t pid, const struct iovec __user *lvec, unsigned long liovcnt, const struct iovec __user *rvec, unsigned long riovcnt, unsigned long flags); asmlinkage long sys_process_vm_writev(pid_t pid, const struct iovec __user *lvec, unsigned long liovcnt, const struct iovec __user *rvec, unsigned long riovcnt, unsigned long flags); asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); asmlinkage long sys_sched_setattr(pid_t pid, struct sched_attr __user *attr, unsigned int flags); asmlinkage long sys_sched_getattr(pid_t pid, struct sched_attr __user *attr, unsigned int size, unsigned int flags); asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, void __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); asmlinkage long sys_userfaultfd(int flags); asmlinkage long sys_membarrier(int cmd, unsigned int flags, int cpu_id); asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags); asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, rwf_t flags); asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, rwf_t flags); asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, unsigned flags, struct mount_attr __user *uattr, size_t usize); asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, int to_dfd, const char __user *to_path, unsigned int ms_flags); asmlinkage long sys_mount_setattr(int dfd, const char __user *path, unsigned int flags, struct mount_attr __user *uattr, size_t usize); asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags); asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key, const void __user *value, int aux); asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags); asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags); asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, siginfo_t __user *info, unsigned int flags); asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags); asmlinkage long sys_landlock_create_ruleset(const struct landlock_ruleset_attr __user *attr, size_t size, __u32 flags); asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type rule_type, const void __user *rule_attr, __u32 flags); asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); asmlinkage long sys_memfd_secret(unsigned int flags); asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len, unsigned long home_node, unsigned long flags); asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, u32 __user *size, u32 flags); asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, u32 size, u32 flags); asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags); /* * Architecture-specific system calls */ /* x86 */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); asmlinkage long sys_uretprobe(void); asmlinkage long sys_uprobe(void); /* pciconfig: alpha, arm, arm64, ia64, sparc */ asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, void __user *buf); asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, void __user *buf); asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn); /* powerpc */ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); asmlinkage long sys_spu_create(const char __user *name, unsigned int flags, umode_t mode, int fd); /* * Deprecated system calls which are still defined in * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch */ /* __ARCH_WANT_SYSCALL_NO_AT */ asmlinkage long sys_open(const char __user *filename, int flags, umode_t mode); asmlinkage long sys_link(const char __user *oldname, const char __user *newname); asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_mknod(const char __user *filename, umode_t mode, unsigned dev); asmlinkage long sys_chmod(const char __user *filename, umode_t mode); asmlinkage long sys_chown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); asmlinkage long sys_rmdir(const char __user *pathname); asmlinkage long sys_lchown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_access(const char __user *filename, int mode); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); asmlinkage long sys_symlink(const char __user *old, const char __user *new); #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) asmlinkage long sys_stat64(const char __user *filename, struct stat64 __user *statbuf); asmlinkage long sys_lstat64(const char __user *filename, struct stat64 __user *statbuf); #endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */ asmlinkage long sys_pipe(int __user *fildes); asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); asmlinkage long sys_epoll_create(int size); asmlinkage long sys_inotify_init(void); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); /* __ARCH_WANT_SYSCALL_OFF_T */ asmlinkage long sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count); asmlinkage long sys_newstat(const char __user *filename, struct stat __user *statbuf); asmlinkage long sys_newlstat(const char __user *filename, struct stat __user *statbuf); asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); /* __ARCH_WANT_SYSCALL_DEPRECATED */ asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpgrp(void); asmlinkage long sys_pause(void); asmlinkage long sys_time(__kernel_old_time_t __user *tloc); asmlinkage long sys_time32(old_time32_t __user *tloc); #ifdef __ARCH_WANT_SYS_UTIME asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); asmlinkage long sys_utimes(char __user *filename, struct __kernel_old_timeval __user *utimes); asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct __kernel_old_timeval __user *utimes); #endif asmlinkage long sys_futimesat_time32(unsigned int dfd, const char __user *filename, struct old_timeval32 __user *t); asmlinkage long sys_utime32(const char __user *filename, struct old_utimbuf32 __user *t); asmlinkage long sys_utimes_time32(const char __user *filename, struct old_timeval32 __user *t); asmlinkage long sys_creat(const char __user *pathname, umode_t mode); asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user *dirent, unsigned int count); asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp); asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, int timeout); asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, int timeout); asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); asmlinkage long sys_vfork(void); asmlinkage long sys_recv(int, void __user *, size_t, unsigned); asmlinkage long sys_send(int, void __user *, size_t, unsigned); asmlinkage long sys_oldumount(char __user *name); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); asmlinkage long sys_fork(void); /* obsolete */ asmlinkage long sys_stime(__kernel_old_time_t __user *tptr); asmlinkage long sys_stime32(old_time32_t __user *tptr); /* obsolete */ asmlinkage long sys_sigpending(old_sigset_t __user *uset); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); #ifdef CONFIG_OLD_SIGSUSPEND asmlinkage long sys_sigsuspend(old_sigset_t mask); #endif #ifdef CONFIG_OLD_SIGSUSPEND3 asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask); #endif #ifdef CONFIG_OLD_SIGACTION asmlinkage long sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); #endif asmlinkage long sys_sgetmask(void); asmlinkage long sys_ssetmask(int newmask); asmlinkage long sys_signal(int sig, __sighandler_t handler); /* obsolete */ asmlinkage long sys_nice(int increment); /* obsolete */ asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, unsigned long cmdline_len, const char __user *cmdline_ptr, unsigned long flags); /* obsolete */ asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options); /* obsolete */ #ifdef CONFIG_HAVE_UID16 asmlinkage long sys_chown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_lchown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group); asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid); asmlinkage long sys_setgid16(old_gid_t gid); asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid); asmlinkage long sys_setuid16(old_uid_t uid); asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid); asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid); asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid); asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid); asmlinkage long sys_setfsuid16(old_uid_t uid); asmlinkage long sys_setfsgid16(old_gid_t gid); asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist); asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist); asmlinkage long sys_getuid16(void); asmlinkage long sys_geteuid16(void); asmlinkage long sys_getgid16(void); asmlinkage long sys_getegid16(void); #endif /* obsolete */ asmlinkage long sys_socketcall(int call, unsigned long __user *args); /* obsolete */ asmlinkage long sys_stat(const char __user *filename, struct __old_kernel_stat __user *statbuf); asmlinkage long sys_lstat(const char __user *filename, struct __old_kernel_stat __user *statbuf); asmlinkage long sys_fstat(unsigned int fd, struct __old_kernel_stat __user *statbuf); asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz); /* obsolete */ asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); /* obsolete */ asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); /* obsolete */ asmlinkage long sys_gethostname(char __user *name, int len); asmlinkage long sys_uname(struct old_utsname __user *); asmlinkage long sys_olduname(struct oldold_utsname __user *); #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); #endif /* obsolete */ asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, unsigned long third, void __user *ptr, long fifth); /* obsolete */ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); /* * Not a real system call, but a placeholder for syscalls which are * not implemented -- see kernel/sys_ni.c */ asmlinkage long sys_ni_syscall(void); #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ asmlinkage long sys_ni_posix_timers(void); /* * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. * Instead, use one of the functions which work equivalently, such as * the ksys_xyzyyz() functions prototyped below. */ ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); int ksys_setsid(void); int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags); ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos); ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len); #ifdef CONFIG_ADVISE_SYSCALLS int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); #else static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) { return -EINVAL; } #endif unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); ssize_t ksys_readahead(int fd, loff_t offset, size_t count); int ksys_ipc(unsigned int call, int first, unsigned long second, unsigned long third, void __user * ptr, long fifth); int compat_ksys_ipc(u32 call, int first, int second, u32 third, u32 ptr, u32 fifth); /* * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ extern int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); static inline long ksys_chown(const char __user *filename, uid_t user, gid_t group) { return do_fchownat(AT_FDCWD, filename, user, group, 0); } static inline long ksys_lchown(const char __user *filename, uid_t user, gid_t group) { return do_fchownat(AT_FDCWD, filename, user, group, AT_SYMLINK_NOFOLLOW); } int do_sys_ftruncate(unsigned int fd, loff_t length, int small); static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } int do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) { return do_sys_truncate(pathname, length); } static inline unsigned int ksys_personality(unsigned int personality) { unsigned int old = current->personality; if (personality != 0xffffffff) set_personality(personality); return old; } /* for __ARCH_WANT_SYS_IPC */ long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct __kernel_timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct old_timespec32 __user *timeout); long __do_semtimedop(int semid, struct sembuf *tsems, unsigned int nsops, const struct timespec64 *timeout, struct ipc_namespace *ns); int __sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen); int __sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen); #endif |
| 5 5 5 7 6 7 7 7 6 6 7 3 2 7 10 10 10 10 62 54 62 62 14 5 3 3 2 2 4 4 4 2 2 1 2 2 9 8 2 1 3 3 5 4 5 3 8 10 1 4 1 7 1 6 2 2 3 1 2 1 1 4 4 2 1 1 3 22 2 60 2 1 1 60 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | // SPDX-License-Identifier: GPL-2.0 /* * RTC subsystem, dev interface * * Copyright (C) 2005 Tower Technologies * Author: Alessandro Zummo <a.zummo@towertech.it> * * based on arch/arm/common/rtctime.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/compat.h> #include <linux/module.h> #include <linux/rtc.h> #include <linux/sched/signal.h> #include "rtc-core.h" static dev_t rtc_devt; #define RTC_DEV_MAX 16 /* 16 RTCs should be enough for everyone... */ static int rtc_dev_open(struct inode *inode, struct file *file) { struct rtc_device *rtc = container_of(inode->i_cdev, struct rtc_device, char_dev); if (test_and_set_bit_lock(RTC_DEV_BUSY, &rtc->flags)) return -EBUSY; file->private_data = rtc; spin_lock_irq(&rtc->irq_lock); rtc->irq_data = 0; spin_unlock_irq(&rtc->irq_lock); return 0; } #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL /* * Routine to poll RTC seconds field for change as often as possible, * after first RTC_UIE use timer to reduce polling */ static void rtc_uie_task(struct work_struct *work) { struct rtc_device *rtc = container_of(work, struct rtc_device, uie_task); struct rtc_time tm; int num = 0; int err; err = rtc_read_time(rtc, &tm); spin_lock_irq(&rtc->irq_lock); if (rtc->stop_uie_polling || err) { rtc->uie_task_active = 0; } else if (rtc->oldsecs != tm.tm_sec) { num = (tm.tm_sec + 60 - rtc->oldsecs) % 60; rtc->oldsecs = tm.tm_sec; rtc->uie_timer.expires = jiffies + HZ - (HZ / 10); rtc->uie_timer_active = 1; rtc->uie_task_active = 0; add_timer(&rtc->uie_timer); } else if (schedule_work(&rtc->uie_task) == 0) { rtc->uie_task_active = 0; } spin_unlock_irq(&rtc->irq_lock); if (num) rtc_handle_legacy_irq(rtc, num, RTC_UF); } static void rtc_uie_timer(struct timer_list *t) { struct rtc_device *rtc = timer_container_of(rtc, t, uie_timer); unsigned long flags; spin_lock_irqsave(&rtc->irq_lock, flags); rtc->uie_timer_active = 0; rtc->uie_task_active = 1; if ((schedule_work(&rtc->uie_task) == 0)) rtc->uie_task_active = 0; spin_unlock_irqrestore(&rtc->irq_lock, flags); } static int clear_uie(struct rtc_device *rtc) { spin_lock_irq(&rtc->irq_lock); if (rtc->uie_irq_active) { rtc->stop_uie_polling = 1; if (rtc->uie_timer_active) { spin_unlock_irq(&rtc->irq_lock); timer_delete_sync(&rtc->uie_timer); spin_lock_irq(&rtc->irq_lock); rtc->uie_timer_active = 0; } if (rtc->uie_task_active) { spin_unlock_irq(&rtc->irq_lock); flush_work(&rtc->uie_task); spin_lock_irq(&rtc->irq_lock); } rtc->uie_irq_active = 0; } spin_unlock_irq(&rtc->irq_lock); return 0; } static int set_uie(struct rtc_device *rtc) { struct rtc_time tm; int err; err = rtc_read_time(rtc, &tm); if (err) return err; spin_lock_irq(&rtc->irq_lock); if (!rtc->uie_irq_active) { rtc->uie_irq_active = 1; rtc->stop_uie_polling = 0; rtc->oldsecs = tm.tm_sec; rtc->uie_task_active = 1; if (schedule_work(&rtc->uie_task) == 0) rtc->uie_task_active = 0; } rtc->irq_data = 0; spin_unlock_irq(&rtc->irq_lock); return 0; } int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled) { if (enabled) return set_uie(rtc); else return clear_uie(rtc); } EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul); #endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */ static ssize_t rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct rtc_device *rtc = file->private_data; DECLARE_WAITQUEUE(wait, current); unsigned long data; ssize_t ret; if (count != sizeof(unsigned int) && count < sizeof(unsigned long)) return -EINVAL; add_wait_queue(&rtc->irq_queue, &wait); do { __set_current_state(TASK_INTERRUPTIBLE); spin_lock_irq(&rtc->irq_lock); data = rtc->irq_data; rtc->irq_data = 0; spin_unlock_irq(&rtc->irq_lock); if (data != 0) { ret = 0; break; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } schedule(); } while (1); set_current_state(TASK_RUNNING); remove_wait_queue(&rtc->irq_queue, &wait); if (ret == 0) { if (sizeof(int) != sizeof(long) && count == sizeof(unsigned int)) ret = put_user(data, (unsigned int __user *)buf) ?: sizeof(unsigned int); else ret = put_user(data, (unsigned long __user *)buf) ?: sizeof(unsigned long); } return ret; } static __poll_t rtc_dev_poll(struct file *file, poll_table *wait) { struct rtc_device *rtc = file->private_data; unsigned long data; poll_wait(file, &rtc->irq_queue, wait); data = rtc->irq_data; return (data != 0) ? (EPOLLIN | EPOLLRDNORM) : 0; } static long rtc_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; struct rtc_device *rtc = file->private_data; const struct rtc_class_ops *ops = rtc->ops; struct rtc_time tm; struct rtc_wkalrm alarm; struct rtc_param param; void __user *uarg = (void __user *)arg; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; /* check that the calling task has appropriate permissions * for certain ioctls. doing this check here is useful * to avoid duplicate code in each driver. */ switch (cmd) { case RTC_EPOCH_SET: case RTC_SET_TIME: case RTC_PARAM_SET: if (!capable(CAP_SYS_TIME)) err = -EACCES; break; case RTC_IRQP_SET: if (arg > rtc->max_user_freq && !capable(CAP_SYS_RESOURCE)) err = -EACCES; break; case RTC_PIE_ON: if (rtc->irq_freq > rtc->max_user_freq && !capable(CAP_SYS_RESOURCE)) err = -EACCES; break; } if (err) goto done; /* * Drivers *SHOULD NOT* provide ioctl implementations * for these requests. Instead, provide methods to * support the following code, so that the RTC's main * features are accessible without using ioctls. * * RTC and alarm times will be in UTC, by preference, * but dual-booting with MS-Windows implies RTCs must * use the local wall clock time. */ switch (cmd) { case RTC_ALM_READ: mutex_unlock(&rtc->ops_lock); err = rtc_read_alarm(rtc, &alarm); if (err < 0) return err; if (copy_to_user(uarg, &alarm.time, sizeof(tm))) err = -EFAULT; return err; case RTC_ALM_SET: mutex_unlock(&rtc->ops_lock); if (copy_from_user(&alarm.time, uarg, sizeof(tm))) return -EFAULT; alarm.enabled = 0; alarm.pending = 0; alarm.time.tm_wday = -1; alarm.time.tm_yday = -1; alarm.time.tm_isdst = -1; /* RTC_ALM_SET alarms may be up to 24 hours in the future. * Rather than expecting every RTC to implement "don't care" * for day/month/year fields, just force the alarm to have * the right values for those fields. * * RTC_WKALM_SET should be used instead. Not only does it * eliminate the need for a separate RTC_AIE_ON call, it * doesn't have the "alarm 23:59:59 in the future" race. * * NOTE: some legacy code may have used invalid fields as * wildcards, exposing hardware "periodic alarm" capabilities. * Not supported here. */ { time64_t now, then; err = rtc_read_time(rtc, &tm); if (err < 0) return err; now = rtc_tm_to_time64(&tm); alarm.time.tm_mday = tm.tm_mday; alarm.time.tm_mon = tm.tm_mon; alarm.time.tm_year = tm.tm_year; err = rtc_valid_tm(&alarm.time); if (err < 0) return err; then = rtc_tm_to_time64(&alarm.time); /* alarm may need to wrap into tomorrow */ if (then < now) { rtc_time64_to_tm(now + 24 * 60 * 60, &tm); alarm.time.tm_mday = tm.tm_mday; alarm.time.tm_mon = tm.tm_mon; alarm.time.tm_year = tm.tm_year; } } return rtc_set_alarm(rtc, &alarm); case RTC_RD_TIME: mutex_unlock(&rtc->ops_lock); err = rtc_read_time(rtc, &tm); if (err < 0) return err; if (copy_to_user(uarg, &tm, sizeof(tm))) err = -EFAULT; return err; case RTC_SET_TIME: mutex_unlock(&rtc->ops_lock); if (copy_from_user(&tm, uarg, sizeof(tm))) return -EFAULT; return rtc_set_time(rtc, &tm); case RTC_PIE_ON: err = rtc_irq_set_state(rtc, 1); break; case RTC_PIE_OFF: err = rtc_irq_set_state(rtc, 0); break; case RTC_AIE_ON: mutex_unlock(&rtc->ops_lock); return rtc_alarm_irq_enable(rtc, 1); case RTC_AIE_OFF: mutex_unlock(&rtc->ops_lock); return rtc_alarm_irq_enable(rtc, 0); case RTC_UIE_ON: mutex_unlock(&rtc->ops_lock); return rtc_update_irq_enable(rtc, 1); case RTC_UIE_OFF: mutex_unlock(&rtc->ops_lock); return rtc_update_irq_enable(rtc, 0); case RTC_IRQP_SET: err = rtc_irq_set_freq(rtc, arg); break; case RTC_IRQP_READ: err = put_user(rtc->irq_freq, (unsigned long __user *)uarg); break; case RTC_WKALM_SET: mutex_unlock(&rtc->ops_lock); if (copy_from_user(&alarm, uarg, sizeof(alarm))) return -EFAULT; return rtc_set_alarm(rtc, &alarm); case RTC_WKALM_RD: mutex_unlock(&rtc->ops_lock); err = rtc_read_alarm(rtc, &alarm); if (err < 0) return err; if (copy_to_user(uarg, &alarm, sizeof(alarm))) err = -EFAULT; return err; case RTC_PARAM_GET: if (copy_from_user(¶m, uarg, sizeof(param))) { mutex_unlock(&rtc->ops_lock); return -EFAULT; } switch(param.param) { case RTC_PARAM_FEATURES: if (param.index != 0) err = -EINVAL; param.uvalue = rtc->features[0]; break; case RTC_PARAM_CORRECTION: { long offset; mutex_unlock(&rtc->ops_lock); if (param.index != 0) return -EINVAL; err = rtc_read_offset(rtc, &offset); mutex_lock(&rtc->ops_lock); if (err == 0) param.svalue = offset; break; } default: if (rtc->ops->param_get) err = rtc->ops->param_get(rtc->dev.parent, ¶m); else err = -EINVAL; } if (!err) if (copy_to_user(uarg, ¶m, sizeof(param))) err = -EFAULT; break; case RTC_PARAM_SET: if (copy_from_user(¶m, uarg, sizeof(param))) { mutex_unlock(&rtc->ops_lock); return -EFAULT; } switch(param.param) { case RTC_PARAM_FEATURES: err = -EINVAL; break; case RTC_PARAM_CORRECTION: mutex_unlock(&rtc->ops_lock); if (param.index != 0) return -EINVAL; return rtc_set_offset(rtc, param.svalue); default: if (rtc->ops->param_set) err = rtc->ops->param_set(rtc->dev.parent, ¶m); else err = -EINVAL; } break; default: /* Finally try the driver's ioctl interface */ if (ops->ioctl) { err = ops->ioctl(rtc->dev.parent, cmd, arg); if (err == -ENOIOCTLCMD) err = -ENOTTY; } else { err = -ENOTTY; } break; } done: mutex_unlock(&rtc->ops_lock); return err; } #ifdef CONFIG_COMPAT #define RTC_IRQP_SET32 _IOW('p', 0x0c, __u32) #define RTC_IRQP_READ32 _IOR('p', 0x0b, __u32) #define RTC_EPOCH_SET32 _IOW('p', 0x0e, __u32) static long rtc_dev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct rtc_device *rtc = file->private_data; void __user *uarg = compat_ptr(arg); switch (cmd) { case RTC_IRQP_READ32: return put_user(rtc->irq_freq, (__u32 __user *)uarg); case RTC_IRQP_SET32: /* arg is a plain integer, not pointer */ return rtc_dev_ioctl(file, RTC_IRQP_SET, arg); case RTC_EPOCH_SET32: /* arg is a plain integer, not pointer */ return rtc_dev_ioctl(file, RTC_EPOCH_SET, arg); } return rtc_dev_ioctl(file, cmd, (unsigned long)uarg); } #endif static int rtc_dev_fasync(int fd, struct file *file, int on) { struct rtc_device *rtc = file->private_data; return fasync_helper(fd, file, on, &rtc->async_queue); } static int rtc_dev_release(struct inode *inode, struct file *file) { struct rtc_device *rtc = file->private_data; /* We shut down the repeating IRQs that userspace enabled, * since nothing is listening to them. * - Update (UIE) ... currently only managed through ioctls * - Periodic (PIE) ... also used through rtc_*() interface calls * * Leave the alarm alone; it may be set to trigger a system wakeup * later, or be used by kernel code, and is a one-shot event anyway. */ /* Keep ioctl until all drivers are converted */ rtc_dev_ioctl(file, RTC_UIE_OFF, 0); rtc_update_irq_enable(rtc, 0); rtc_irq_set_state(rtc, 0); clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags); return 0; } static const struct file_operations rtc_dev_fops = { .owner = THIS_MODULE, .read = rtc_dev_read, .poll = rtc_dev_poll, .unlocked_ioctl = rtc_dev_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = rtc_dev_compat_ioctl, #endif .open = rtc_dev_open, .release = rtc_dev_release, .fasync = rtc_dev_fasync, }; /* insertion/removal hooks */ void rtc_dev_prepare(struct rtc_device *rtc) { if (!rtc_devt) return; if (rtc->id >= RTC_DEV_MAX) { dev_dbg(&rtc->dev, "too many RTC devices\n"); return; } rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL INIT_WORK(&rtc->uie_task, rtc_uie_task); timer_setup(&rtc->uie_timer, rtc_uie_timer, 0); #endif cdev_init(&rtc->char_dev, &rtc_dev_fops); rtc->char_dev.owner = rtc->owner; } void __init rtc_dev_init(void) { int err; err = alloc_chrdev_region(&rtc_devt, 0, RTC_DEV_MAX, "rtc"); if (err < 0) pr_err("failed to allocate char dev region\n"); } |
| 7 5 7 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 35 6 6 6 30 30 10 10 10 10 10 10 9 9 8 8 2 2 2 2 2 8 8 8 6 8 9 4 4 4 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 | // SPDX-License-Identifier: GPL-2.0+ /* * Enhanced Host Controller Interface (EHCI) driver for USB. * * Maintainer: Alan Stern <stern@rowland.harvard.edu> * * Copyright (c) 2000-2004 by David Brownell */ #include <linux/module.h> #include <linux/pci.h> #include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/ioport.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/interrupt.h> #include <linux/usb.h> #include <linux/usb/hcd.h> #include <linux/usb/otg.h> #include <linux/moduleparam.h> #include <linux/dma-mapping.h> #include <linux/debugfs.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <asm/byteorder.h> #include <asm/io.h> #include <asm/irq.h> #include <linux/unaligned.h> #if defined(CONFIG_PPC_PS3) #include <asm/firmware.h> #endif /*-------------------------------------------------------------------------*/ /* * EHCI hc_driver implementation ... experimental, incomplete. * Based on the final 1.0 register interface specification. * * USB 2.0 shows up in upcoming www.pcmcia.org technology. * First was PCMCIA, like ISA; then CardBus, which is PCI. * Next comes "CardBay", using USB 2.0 signals. * * Contains additional contributions by Brad Hards, Rory Bolt, and others. * Special thanks to Intel and VIA for providing host controllers to * test this driver on, and Cypress (including In-System Design) for * providing early devices for those host controllers to talk to! */ #define DRIVER_AUTHOR "David Brownell" #define DRIVER_DESC "USB 2.0 'Enhanced' Host Controller (EHCI) Driver" static const char hcd_name [] = "ehci_hcd"; #undef EHCI_URB_TRACE /* magic numbers that can affect system performance */ #define EHCI_TUNE_CERR 3 /* 0-3 qtd retries; 0 == don't stop */ #define EHCI_TUNE_RL_HS 4 /* nak throttle; see 4.9 */ #define EHCI_TUNE_RL_TT 0 #define EHCI_TUNE_MULT_HS 1 /* 1-3 transactions/uframe; 4.10.3 */ #define EHCI_TUNE_MULT_TT 1 /* * Some drivers think it's safe to schedule isochronous transfers more than * 256 ms into the future (partly as a result of an old bug in the scheduling * code). In an attempt to avoid trouble, we will use a minimum scheduling * length of 512 frames instead of 256. */ #define EHCI_TUNE_FLS 1 /* (medium) 512-frame schedule */ /* Initial IRQ latency: faster than hw default */ static int log2_irq_thresh; // 0 to 6 module_param (log2_irq_thresh, int, S_IRUGO); MODULE_PARM_DESC (log2_irq_thresh, "log2 IRQ latency, 1-64 microframes"); /* initial park setting: slower than hw default */ static unsigned park; module_param (park, uint, S_IRUGO); MODULE_PARM_DESC (park, "park setting; 1-3 back-to-back async packets"); /* for flakey hardware, ignore overcurrent indicators */ static bool ignore_oc; module_param (ignore_oc, bool, S_IRUGO); MODULE_PARM_DESC (ignore_oc, "ignore bogus hardware overcurrent indications"); #define INTR_MASK (STS_IAA | STS_FATAL | STS_PCD | STS_ERR | STS_INT) /*-------------------------------------------------------------------------*/ #include "ehci.h" #include "pci-quirks.h" static void compute_tt_budget(u8 budget_table[EHCI_BANDWIDTH_SIZE], struct ehci_tt *tt); /* * The MosChip MCS9990 controller updates its microframe counter * a little before the frame counter, and occasionally we will read * the invalid intermediate value. Avoid problems by checking the * microframe number (the low-order 3 bits); if they are 0 then * re-read the register to get the correct value. */ static unsigned ehci_moschip_read_frame_index(struct ehci_hcd *ehci) { unsigned uf; uf = ehci_readl(ehci, &ehci->regs->frame_index); if (unlikely((uf & 7) == 0)) uf = ehci_readl(ehci, &ehci->regs->frame_index); return uf; } static inline unsigned ehci_read_frame_index(struct ehci_hcd *ehci) { if (ehci->frame_index_bug) return ehci_moschip_read_frame_index(ehci); return ehci_readl(ehci, &ehci->regs->frame_index); } #include "ehci-dbg.c" /*-------------------------------------------------------------------------*/ /* * ehci_handshake - spin reading hc until handshake completes or fails * @ptr: address of hc register to be read * @mask: bits to look at in result of read * @done: value of those bits when handshake succeeds * @usec: timeout in microseconds * * Returns negative errno, or zero on success * * Success happens when the "mask" bits have the specified value (hardware * handshake done). There are two failure modes: "usec" have passed (major * hardware flakeout), or the register reads as all-ones (hardware removed). * * That last failure should_only happen in cases like physical cardbus eject * before driver shutdown. But it also seems to be caused by bugs in cardbus * bridge shutdown: shutting down the bridge before the devices using it. */ int ehci_handshake(struct ehci_hcd *ehci, void __iomem *ptr, u32 mask, u32 done, int usec) { u32 result; do { result = ehci_readl(ehci, ptr); if (result == ~(u32)0) /* card removed */ return -ENODEV; result &= mask; if (result == done) return 0; udelay (1); usec--; } while (usec > 0); return -ETIMEDOUT; } EXPORT_SYMBOL_GPL(ehci_handshake); /* check TDI/ARC silicon is in host mode */ static int tdi_in_host_mode (struct ehci_hcd *ehci) { u32 tmp; tmp = ehci_readl(ehci, &ehci->regs->usbmode); return (tmp & 3) == USBMODE_CM_HC; } /* * Force HC to halt state from unknown (EHCI spec section 2.3). * Must be called with interrupts enabled and the lock not held. */ static int ehci_halt (struct ehci_hcd *ehci) { u32 temp; spin_lock_irq(&ehci->lock); /* disable any irqs left enabled by previous code */ ehci_writel(ehci, 0, &ehci->regs->intr_enable); if (ehci_is_TDI(ehci) && !tdi_in_host_mode(ehci)) { spin_unlock_irq(&ehci->lock); return 0; } /* * This routine gets called during probe before ehci->command * has been initialized, so we can't rely on its value. */ ehci->command &= ~CMD_RUN; temp = ehci_readl(ehci, &ehci->regs->command); temp &= ~(CMD_RUN | CMD_IAAD); ehci_writel(ehci, temp, &ehci->regs->command); spin_unlock_irq(&ehci->lock); synchronize_irq(ehci_to_hcd(ehci)->irq); return ehci_handshake(ehci, &ehci->regs->status, STS_HALT, STS_HALT, 16 * 125); } /* put TDI/ARC silicon into EHCI mode */ static void tdi_reset (struct ehci_hcd *ehci) { u32 tmp; tmp = ehci_readl(ehci, &ehci->regs->usbmode); tmp |= USBMODE_CM_HC; /* The default byte access to MMR space is LE after * controller reset. Set the required endian mode * for transfer buffers to match the host microprocessor */ if (ehci_big_endian_mmio(ehci)) tmp |= USBMODE_BE; ehci_writel(ehci, tmp, &ehci->regs->usbmode); } /* * Reset a non-running (STS_HALT == 1) controller. * Must be called with interrupts enabled and the lock not held. */ int ehci_reset(struct ehci_hcd *ehci) { int retval; u32 command = ehci_readl(ehci, &ehci->regs->command); /* If the EHCI debug controller is active, special care must be * taken before and after a host controller reset */ if (ehci->debug && !dbgp_reset_prep(ehci_to_hcd(ehci))) ehci->debug = NULL; command |= CMD_RESET; dbg_cmd (ehci, "reset", command); ehci_writel(ehci, command, &ehci->regs->command); ehci->rh_state = EHCI_RH_HALTED; ehci->next_statechange = jiffies; retval = ehci_handshake(ehci, &ehci->regs->command, CMD_RESET, 0, 250 * 1000); if (ehci->has_hostpc) { ehci_writel(ehci, USBMODE_EX_HC | USBMODE_EX_VBPS, &ehci->regs->usbmode_ex); ehci_writel(ehci, TXFIFO_DEFAULT, &ehci->regs->txfill_tuning); } if (retval) return retval; if (ehci_is_TDI(ehci)) tdi_reset (ehci); if (ehci->debug) dbgp_external_startup(ehci_to_hcd(ehci)); ehci->port_c_suspend = ehci->suspended_ports = ehci->resuming_ports = 0; return retval; } EXPORT_SYMBOL_GPL(ehci_reset); /* * Idle the controller (turn off the schedules). * Must be called with interrupts enabled and the lock not held. */ static void ehci_quiesce (struct ehci_hcd *ehci) { u32 temp; if (ehci->rh_state != EHCI_RH_RUNNING) return; /* wait for any schedule enables/disables to take effect */ temp = (ehci->command << 10) & (STS_ASS | STS_PSS); ehci_handshake(ehci, &ehci->regs->status, STS_ASS | STS_PSS, temp, 16 * 125); /* then disable anything that's still active */ spin_lock_irq(&ehci->lock); ehci->command &= ~(CMD_ASE | CMD_PSE); ehci_writel(ehci, ehci->command, &ehci->regs->command); spin_unlock_irq(&ehci->lock); /* hardware can take 16 microframes to turn off ... */ ehci_handshake(ehci, &ehci->regs->status, STS_ASS | STS_PSS, 0, 16 * 125); } /*-------------------------------------------------------------------------*/ static void end_iaa_cycle(struct ehci_hcd *ehci); static void end_unlink_async(struct ehci_hcd *ehci); static void unlink_empty_async(struct ehci_hcd *ehci); static void ehci_work(struct ehci_hcd *ehci); static void start_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh); static void end_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh); static int ehci_port_power(struct ehci_hcd *ehci, int portnum, bool enable); #include "ehci-timer.c" #include "ehci-hub.c" #include "ehci-mem.c" #include "ehci-q.c" #include "ehci-sched.c" #include "ehci-sysfs.c" /*-------------------------------------------------------------------------*/ /* On some systems, leaving remote wakeup enabled prevents system shutdown. * The firmware seems to think that powering off is a wakeup event! * This routine turns off remote wakeup and everything else, on all ports. */ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci) { int port = HCS_N_PORTS(ehci->hcs_params); while (port--) { spin_unlock_irq(&ehci->lock); ehci_port_power(ehci, port, false); spin_lock_irq(&ehci->lock); ehci_writel(ehci, PORT_RWC_BITS, &ehci->regs->port_status[port]); } } /* * Halt HC, turn off all ports, and let the BIOS use the companion controllers. * Must be called with interrupts enabled and the lock not held. */ static void ehci_silence_controller(struct ehci_hcd *ehci) { ehci_halt(ehci); spin_lock_irq(&ehci->lock); ehci->rh_state = EHCI_RH_HALTED; ehci_turn_off_all_ports(ehci); /* make BIOS/etc use companion controller during reboot */ ehci_writel(ehci, 0, &ehci->regs->configured_flag); /* unblock posted writes */ ehci_readl(ehci, &ehci->regs->configured_flag); spin_unlock_irq(&ehci->lock); } /* ehci_shutdown kick in for silicon on any bus (not just pci, etc). * This forcibly disables dma and IRQs, helping kexec and other cases * where the next system software may expect clean state. */ static void ehci_shutdown(struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); /** * Protect the system from crashing at system shutdown in cases where * usb host is not added yet from OTG controller driver. * As ehci_setup() not done yet, so stop accessing registers or * variables initialized in ehci_setup() */ if (!ehci->sbrn) return; spin_lock_irq(&ehci->lock); ehci->shutdown = true; ehci->rh_state = EHCI_RH_STOPPING; ehci->enabled_hrtimer_events = 0; spin_unlock_irq(&ehci->lock); ehci_silence_controller(ehci); hrtimer_cancel(&ehci->hrtimer); } /*-------------------------------------------------------------------------*/ /* * ehci_work is called from some interrupts, timers, and so on. * it calls driver completion functions, after dropping ehci->lock. */ static void ehci_work (struct ehci_hcd *ehci) { /* another CPU may drop ehci->lock during a schedule scan while * it reports urb completions. this flag guards against bogus * attempts at re-entrant schedule scanning. */ if (ehci->scanning) { ehci->need_rescan = true; return; } ehci->scanning = true; rescan: ehci->need_rescan = false; if (ehci->async_count) scan_async(ehci); if (ehci->intr_count > 0) scan_intr(ehci); if (ehci->isoc_count > 0) scan_isoc(ehci); if (ehci->need_rescan) goto rescan; ehci->scanning = false; /* the IO watchdog guards against hardware or driver bugs that * misplace IRQs, and should let us run completely without IRQs. * such lossage has been observed on both VT6202 and VT8235. */ turn_on_io_watchdog(ehci); } /* * Called when the ehci_hcd module is removed. */ static void ehci_stop (struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); ehci_dbg (ehci, "stop\n"); /* no more interrupts ... */ spin_lock_irq(&ehci->lock); ehci->enabled_hrtimer_events = 0; spin_unlock_irq(&ehci->lock); ehci_quiesce(ehci); ehci_silence_controller(ehci); ehci_reset (ehci); hrtimer_cancel(&ehci->hrtimer); remove_sysfs_files(ehci); remove_debug_files (ehci); /* root hub is shut down separately (first, when possible) */ spin_lock_irq (&ehci->lock); end_free_itds(ehci); spin_unlock_irq (&ehci->lock); ehci_mem_cleanup (ehci); if (ehci->amd_pll_fix == 1) usb_amd_dev_put(); dbg_status (ehci, "ehci_stop completed", ehci_readl(ehci, &ehci->regs->status)); } /* one-time init, only for memory state */ static int ehci_init(struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); u32 temp; int retval; u32 hcc_params; struct ehci_qh_hw *hw; spin_lock_init(&ehci->lock); /* * keep io watchdog by default, those good HCDs could turn off it later */ ehci->need_io_watchdog = 1; hrtimer_setup(&ehci->hrtimer, ehci_hrtimer_func, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ehci->next_hrtimer_event = EHCI_HRTIMER_NO_EVENT; hcc_params = ehci_readl(ehci, &ehci->caps->hcc_params); /* * by default set standard 80% (== 100 usec/uframe) max periodic * bandwidth as required by USB 2.0 */ ehci->uframe_periodic_max = 100; /* * hw default: 1K periodic list heads, one per frame. * periodic_size can shrink by USBCMD update if hcc_params allows. */ ehci->periodic_size = DEFAULT_I_TDPS; INIT_LIST_HEAD(&ehci->async_unlink); INIT_LIST_HEAD(&ehci->async_idle); INIT_LIST_HEAD(&ehci->intr_unlink_wait); INIT_LIST_HEAD(&ehci->intr_unlink); INIT_LIST_HEAD(&ehci->intr_qh_list); INIT_LIST_HEAD(&ehci->cached_itd_list); INIT_LIST_HEAD(&ehci->cached_sitd_list); INIT_LIST_HEAD(&ehci->tt_list); if (HCC_PGM_FRAMELISTLEN(hcc_params)) { /* periodic schedule size can be smaller than default */ switch (EHCI_TUNE_FLS) { case 0: ehci->periodic_size = 1024; break; case 1: ehci->periodic_size = 512; break; case 2: ehci->periodic_size = 256; break; default: BUG(); } } if ((retval = ehci_mem_init(ehci, GFP_KERNEL)) < 0) return retval; /* controllers may cache some of the periodic schedule ... */ if (HCC_ISOC_CACHE(hcc_params)) // full frame cache ehci->i_thresh = 0; else // N microframes cached ehci->i_thresh = 2 + HCC_ISOC_THRES(hcc_params); /* * dedicate a qh for the async ring head, since we couldn't unlink * a 'real' qh without stopping the async schedule [4.8]. use it * as the 'reclamation list head' too. * its dummy is used in hw_alt_next of many tds, to prevent the qh * from automatically advancing to the next td after short reads. */ ehci->async->qh_next.qh = NULL; hw = ehci->async->hw; hw->hw_next = QH_NEXT(ehci, ehci->async->qh_dma); hw->hw_info1 = cpu_to_hc32(ehci, QH_HEAD); #if defined(CONFIG_PPC_PS3) hw->hw_info1 |= cpu_to_hc32(ehci, QH_INACTIVATE); #endif hw->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT); hw->hw_qtd_next = EHCI_LIST_END(ehci); ehci->async->qh_state = QH_STATE_LINKED; hw->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma); /* clear interrupt enables, set irq latency */ if (log2_irq_thresh < 0 || log2_irq_thresh > 6) log2_irq_thresh = 0; temp = 1 << (16 + log2_irq_thresh); if (HCC_PER_PORT_CHANGE_EVENT(hcc_params)) { ehci->has_ppcd = 1; ehci_dbg(ehci, "enable per-port change event\n"); temp |= CMD_PPCEE; } if (HCC_CANPARK(hcc_params)) { /* HW default park == 3, on hardware that supports it (like * NVidia and ALI silicon), maximizes throughput on the async * schedule by avoiding QH fetches between transfers. * * With fast usb storage devices and NForce2, "park" seems to * make problems: throughput reduction (!), data errors... */ if (park) { park = min_t(unsigned int, park, 3); temp |= CMD_PARK; temp |= park << 8; } ehci_dbg(ehci, "park %d\n", park); } if (HCC_PGM_FRAMELISTLEN(hcc_params)) { /* periodic schedule size can be smaller than default */ temp &= ~(3 << 2); temp |= (EHCI_TUNE_FLS << 2); } ehci->command = temp; /* Accept arbitrarily long scatter-gather lists */ if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; /* Prepare for unlinking active QHs */ ehci->old_current = ~0; return 0; } /* start HC running; it's halted, ehci_init() has been run (once) */ static int ehci_run (struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); u32 temp; u32 hcc_params; int rc; hcd->uses_new_polling = 1; /* EHCI spec section 4.1 */ ehci_writel(ehci, ehci->periodic_dma, &ehci->regs->frame_list); ehci_writel(ehci, (u32)ehci->async->qh_dma, &ehci->regs->async_next); /* * hcc_params controls whether ehci->regs->segment must (!!!) * be used; it constrains QH/ITD/SITD and QTD locations. * dma_pool consistent memory always uses segment zero. * streaming mappings for I/O buffers, like dma_map_single(), * can return segments above 4GB, if the device allows. * * NOTE: the dma mask is visible through dev->dma_mask, so * drivers can pass this info along ... like NETIF_F_HIGHDMA, * Scsi_Host.highmem_io, and so forth. It's readonly to all * host side drivers though. */ hcc_params = ehci_readl(ehci, &ehci->caps->hcc_params); if (HCC_64BIT_ADDR(hcc_params)) { ehci_writel(ehci, 0, &ehci->regs->segment); #if 0 // this is deeply broken on almost all architectures if (!dma_set_mask(hcd->self.controller, DMA_BIT_MASK(64))) ehci_info(ehci, "enabled 64bit DMA\n"); #endif } // Philips, Intel, and maybe others need CMD_RUN before the // root hub will detect new devices (why?); NEC doesn't ehci->command &= ~(CMD_LRESET|CMD_IAAD|CMD_PSE|CMD_ASE|CMD_RESET); ehci->command |= CMD_RUN; ehci_writel(ehci, ehci->command, &ehci->regs->command); dbg_cmd (ehci, "init", ehci->command); /* * Start, enabling full USB 2.0 functionality ... usb 1.1 devices * are explicitly handed to companion controller(s), so no TT is * involved with the root hub. (Except where one is integrated, * and there's no companion controller unless maybe for USB OTG.) * * Turning on the CF flag will transfer ownership of all ports * from the companions to the EHCI controller. If any of the * companions are in the middle of a port reset at the time, it * could cause trouble. Write-locking ehci_cf_port_reset_rwsem * guarantees that no resets are in progress. After we set CF, * a short delay lets the hardware catch up; new resets shouldn't * be started before the port switching actions could complete. */ down_write(&ehci_cf_port_reset_rwsem); ehci->rh_state = EHCI_RH_RUNNING; ehci_writel(ehci, FLAG_CF, &ehci->regs->configured_flag); /* Wait until HC become operational */ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); /* For Aspeed, STS_HALT also depends on ASS/PSS status. * Check CMD_RUN instead. */ if (ehci->is_aspeed) rc = ehci_handshake(ehci, &ehci->regs->command, CMD_RUN, 1, 100 * 1000); else rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000); up_write(&ehci_cf_port_reset_rwsem); if (rc) { ehci_err(ehci, "USB %x.%x, controller refused to start: %d\n", ((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f), rc); return rc; } ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase)); ehci_info (ehci, "USB %x.%x started, EHCI %x.%02x%s\n", ((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f), temp >> 8, temp & 0xff, (ignore_oc || ehci->spurious_oc) ? ", overcurrent ignored" : ""); ehci_writel(ehci, INTR_MASK, &ehci->regs->intr_enable); /* Turn On Interrupts */ /* GRR this is run-once init(), being done every time the HC starts. * So long as they're part of class devices, we can't do it init() * since the class device isn't created that early. */ create_debug_files(ehci); create_sysfs_files(ehci); return 0; } int ehci_setup(struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); int retval; ehci->regs = (void __iomem *)ehci->caps + HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase)); dbg_hcs_params(ehci, "reset"); dbg_hcc_params(ehci, "reset"); /* cache this readonly data; minimize chip reads */ ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params); ehci->sbrn = HCD_US |