Total coverage: 220640 (13%)of 1823725
513 531 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Private definitions for the generic associative array implementation. * * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_ASSOC_ARRAY_PRIV_H #define _LINUX_ASSOC_ARRAY_PRIV_H #ifdef CONFIG_ASSOCIATIVE_ARRAY #include <linux/assoc_array.h> #define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */ #define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1) #define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT)) #define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1) #define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1) #define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG)) /* * Undefined type representing a pointer with type information in the bottom * two bits. */ struct assoc_array_ptr; /* * An N-way node in the tree. * * Each slot contains one of four things: * * (1) Nothing (NULL). * * (2) A leaf object (pointer types 0). * * (3) A next-level node (pointer type 1, subtype 0). * * (4) A shortcut (pointer type 1, subtype 1). * * The tree is optimised for search-by-ID, but permits reasonable iteration * also. * * The tree is navigated by constructing an index key consisting of an array of * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size. * * The segments correspond to levels of the tree (the first segment is used at * level 0, the second at level 1, etc.). */ struct assoc_array_node { struct assoc_array_ptr *back_pointer; u8 parent_slot; struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT]; unsigned long nr_leaves_on_branch; }; /* * A shortcut through the index space out to where a collection of nodes/leaves * with the same IDs live. */ struct assoc_array_shortcut { struct assoc_array_ptr *back_pointer; int parent_slot; int skip_to_level; struct assoc_array_ptr *next_node; unsigned long index_key[]; }; /* * Preallocation cache. */ struct assoc_array_edit { struct rcu_head rcu; struct assoc_array *array; const struct assoc_array_ops *ops; const struct assoc_array_ops *ops_for_excised_subtree; struct assoc_array_ptr *leaf; struct assoc_array_ptr **leaf_p; struct assoc_array_ptr *dead_leaf; struct assoc_array_ptr *new_meta[3]; struct assoc_array_ptr *excised_meta[1]; struct assoc_array_ptr *excised_subtree; struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT]; struct assoc_array_ptr *set_backpointers_to; struct assoc_array_node *adjust_count_on; long adjust_count_by; struct { struct assoc_array_ptr **ptr; struct assoc_array_ptr *to; } set[2]; struct { u8 *p; u8 to; } set_parent_slot[1]; u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1]; }; /* * Internal tree member pointers are marked in the bottom one or two bits to * indicate what type they are so that we don't have to look behind every * pointer to see what it points to. * * We provide functions to test type annotations and to create and translate * the annotated pointers. */ #define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL #define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */ #define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */ #define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL #define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL #define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x) { return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK; } static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x) { return !assoc_array_ptr_is_meta(x); } static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x) { return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK; } static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x) { return !assoc_array_ptr_is_shortcut(x); } static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x) { return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK); } static inline unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x) { return (unsigned long)x & ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK); } static inline struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x) { return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x); } static inline struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x) { return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x); } static inline struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t) { return (struct assoc_array_ptr *)((unsigned long)p | t); } static inline struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p) { return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE); } static inline struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p) { return __assoc_array_x_to_ptr( p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE); } static inline struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p) { return __assoc_array_x_to_ptr( p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE); } #endif /* CONFIG_ASSOCIATIVE_ARRAY */ #endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2014 Marvell International Ltd. * * Authors: * Lauro Ramos Venancio <lauro.venancio@openbossa.org> * Aloisio Almeida Jr <aloisio.almeida@openbossa.org> */ #ifndef __NET_NFC_H #define __NET_NFC_H #include <linux/nfc.h> #include <linux/device.h> #include <linux/skbuff.h> #define nfc_dbg(dev, fmt, ...) dev_dbg((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_info(dev, fmt, ...) dev_info((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_err(dev, fmt, ...) dev_err((dev), "NFC: " fmt, ##__VA_ARGS__) struct nfc_phy_ops { int (*write)(void *dev_id, struct sk_buff *skb); int (*enable)(void *dev_id); void (*disable)(void *dev_id); }; struct nfc_dev; /** * data_exchange_cb_t - Definition of nfc_data_exchange callback * * @context: nfc_data_exchange cb_context parameter * @skb: response data * @err: If an error has occurred during data exchange, it is the * error number. Zero means no error. * * When a rx or tx package is lost or corrupted or the target gets out * of the operating field, err is -EIO. */ typedef void (*data_exchange_cb_t)(void *context, struct sk_buff *skb, int err); typedef void (*se_io_cb_t)(void *context, u8 *apdu, size_t apdu_len, int err); struct nfc_target; struct nfc_ops { int (*dev_up)(struct nfc_dev *dev); int (*dev_down)(struct nfc_dev *dev); int (*start_poll)(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols); void (*stop_poll)(struct nfc_dev *dev); int (*dep_link_up)(struct nfc_dev *dev, struct nfc_target *target, u8 comm_mode, u8 *gb, size_t gb_len); int (*dep_link_down)(struct nfc_dev *dev); int (*activate_target)(struct nfc_dev *dev, struct nfc_target *target, u32 protocol); void (*deactivate_target)(struct nfc_dev *dev, struct nfc_target *target, u8 mode); int (*im_transceive)(struct nfc_dev *dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); int (*fw_download)(struct nfc_dev *dev, const char *firmware_name); /* Secure Element API */ int (*discover_se)(struct nfc_dev *dev); int (*enable_se)(struct nfc_dev *dev, u32 se_idx); int (*disable_se)(struct nfc_dev *dev, u32 se_idx); int (*se_io) (struct nfc_dev *dev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context); }; #define NFC_TARGET_IDX_ANY -1 #define NFC_MAX_GT_LEN 48 #define NFC_ATR_RES_GT_OFFSET 15 #define NFC_ATR_REQ_GT_OFFSET 14 /** * struct nfc_target - NFC target description * * @sens_res: 2 bytes describing the target SENS_RES response, if the target * is a type A one. The %sens_res most significant byte must be byte 2 * as described by the NFC Forum digital specification (i.e. the platform * configuration one) while %sens_res least significant byte is byte 1. */ struct nfc_target { u32 idx; u32 supported_protocols; u16 sens_res; u8 sel_res; u8 nfcid1_len; u8 nfcid1[NFC_NFCID1_MAXSIZE]; u8 nfcid2_len; u8 nfcid2[NFC_NFCID2_MAXSIZE]; u8 sensb_res_len; u8 sensb_res[NFC_SENSB_RES_MAXSIZE]; u8 sensf_res_len; u8 sensf_res[NFC_SENSF_RES_MAXSIZE]; u8 hci_reader_gate; u8 logical_idx; u8 is_iso15693; u8 iso15693_dsfid; u8 iso15693_uid[NFC_ISO15693_UID_MAXSIZE]; }; /** * nfc_se - A structure for NFC accessible secure elements. * * @idx: The secure element index. User space will enable or * disable a secure element by its index. * @type: The secure element type. It can be SE_UICC or * SE_EMBEDDED. * @state: The secure element state, either enabled or disabled. * */ struct nfc_se { struct list_head list; u32 idx; u16 type; u16 state; }; /** * nfc_evt_transaction - A struct for NFC secure element event transaction. * * @aid: The application identifier triggering the event * * @aid_len: The application identifier length [5:16] * * @params: The application parameters transmitted during the transaction * * @params_len: The applications parameters length [0:255] * */ #define NFC_MIN_AID_LENGTH 5 #define NFC_MAX_AID_LENGTH 16 #define NFC_MAX_PARAMS_LENGTH 255 #define NFC_EVT_TRANSACTION_AID_TAG 0x81 #define NFC_EVT_TRANSACTION_PARAMS_TAG 0x82 struct nfc_evt_transaction { u32 aid_len; u8 aid[NFC_MAX_AID_LENGTH]; u8 params_len; u8 params[]; } __packed; struct nfc_genl_data { u32 poll_req_portid; struct mutex genl_data_mutex; }; struct nfc_vendor_cmd { __u32 vendor_id; __u32 subcmd; int (*doit)(struct nfc_dev *dev, void *data, size_t data_len); }; struct nfc_dev { int idx; u32 target_next_idx; struct nfc_target *targets; int n_targets; int targets_generation; struct device dev; bool dev_up; bool fw_download_in_progress; u8 rf_mode; bool polling; struct nfc_target *active_target; bool dep_link_up; struct nfc_genl_data genl_data; u32 supported_protocols; struct list_head secure_elements; int tx_headroom; int tx_tailroom; struct timer_list check_pres_timer; struct work_struct check_pres_work; bool shutting_down; struct rfkill *rfkill; const struct nfc_vendor_cmd *vendor_cmds; int n_vendor_cmds; const struct nfc_ops *ops; struct genl_info *cur_cmd_info; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) extern const struct class nfc_class; struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom); /** * nfc_free_device - free nfc device * * @dev: The nfc device to free */ static inline void nfc_free_device(struct nfc_dev *dev) { put_device(&dev->dev); } int nfc_register_device(struct nfc_dev *dev); void nfc_unregister_device(struct nfc_dev *dev); /** * nfc_set_parent_dev - set the parent device * * @nfc_dev: The nfc device whose parent is being set * @dev: The parent device */ static inline void nfc_set_parent_dev(struct nfc_dev *nfc_dev, struct device *dev) { nfc_dev->dev.parent = dev; } /** * nfc_set_drvdata - set driver specific data * * @dev: The nfc device * @data: Pointer to driver specific data */ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data) { dev_set_drvdata(&dev->dev, data); } /** * nfc_get_drvdata - get driver specific data * * @dev: The nfc device */ static inline void *nfc_get_drvdata(const struct nfc_dev *dev) { return dev_get_drvdata(&dev->dev); } /** * nfc_device_name - get the nfc device name * * @dev: The nfc device whose name to return */ static inline const char *nfc_device_name(const struct nfc_dev *dev) { return dev_name(&dev->dev); } struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, unsigned int flags, unsigned int size, unsigned int *err); struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gt, u8 gt_len); u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name, u32 result); int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets, int ntargets); int nfc_target_lost(struct nfc_dev *dev, u32 target_idx); int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, const u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); void nfc_driver_failure(struct nfc_dev *dev, int err); int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx); struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, const struct nfc_vendor_cmd *cmds, int n_cmds) { if (dev->vendor_cmds || dev->n_vendor_cmds) return -EINVAL; dev->vendor_cmds = cmds; dev->n_vendor_cmds = n_cmds; return 0; } struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, enum nfc_attrs attr, u32 oui, u32 subcmd, int approxlen); int nfc_vendor_cmd_reply(struct sk_buff *skb); /** * nfc_vendor_cmd_alloc_reply_skb - allocate vendor command reply * @dev: nfc device * @oui: vendor oui * @approxlen: an upper bound of the length of the data that will * be put into the skb * * This function allocates and pre-fills an skb for a reply to * a vendor command. Since it is intended for a reply, calling * it outside of a vendor command's doit() operation is invalid. * * The returned skb is pre-filled with some identifying data in * a way that any data that is put into the skb (with skb_put(), * nla_put() or similar) will end up being within the * %NFC_ATTR_VENDOR_DATA attribute, so all that needs to be done * with the skb is adding data for the corresponding userspace tool * which can then read that data out of the vendor data attribute. * You must not modify the skb in any other way. * * When done, call nfc_vendor_cmd_reply() with the skb and return * its error code as the result of the doit() operation. * * Return: An allocated and pre-filled skb. %NULL if any errors happen. */ static inline struct sk_buff * nfc_vendor_cmd_alloc_reply_skb(struct nfc_dev *dev, u32 oui, u32 subcmd, int approxlen) { return __nfc_alloc_vendor_cmd_reply_skb(dev, NFC_ATTR_VENDOR_DATA, oui, subcmd, approxlen); } #endif /* __NET_NFC_H */
23 23 23 23 6 23 8 8 8 8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/kconfig.h> #include <linux/list.h> #include <linux/security.h> #include <linux/umh.h> #include <linux/sysctl.h> #include <linux/module.h> #include "fallback.h" #include "firmware.h" /* * firmware fallback mechanism */ /* * use small loading timeout for caching devices' firmware because all these * firmware images have been loaded successfully at lease once, also system is * ready for completing firmware loading now. The maximum size of firmware in * current distributions is about 2M bytes, so 10 secs should be enough. */ void fw_fallback_set_cache_timeout(void) { fw_fallback_config.old_timeout = __firmware_loading_timeout(); __fw_fallback_set_timeout(10); } /* Restores the timeout to the value last configured during normal operation */ void fw_fallback_set_default_timeout(void) { __fw_fallback_set_timeout(fw_fallback_config.old_timeout); } static long firmware_loading_timeout(void) { return __firmware_loading_timeout() > 0 ? __firmware_loading_timeout() * HZ : MAX_JIFFY_OFFSET; } static inline int fw_sysfs_wait_timeout(struct fw_priv *fw_priv, long timeout) { return __fw_state_wait_common(fw_priv, timeout); } static LIST_HEAD(pending_fw_head); void kill_pending_fw_fallback_reqs(bool kill_all) { struct fw_priv *fw_priv; struct fw_priv *next; mutex_lock(&fw_lock); list_for_each_entry_safe(fw_priv, next, &pending_fw_head, pending_list) { if (kill_all || !fw_priv->need_uevent) __fw_load_abort(fw_priv); } if (kill_all) fw_load_abort_all = true; mutex_unlock(&fw_lock); } /** * fw_load_sysfs_fallback() - load a firmware via the sysfs fallback mechanism * @fw_sysfs: firmware sysfs information for the firmware to load * @timeout: timeout to wait for the load * * In charge of constructing a sysfs fallback interface for firmware loading. **/ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) { int retval = 0; struct device *f_dev = &fw_sysfs->dev; struct fw_priv *fw_priv = fw_sysfs->fw_priv; /* fall back on userspace loading */ if (!fw_priv->data) fw_priv->is_paged_buf = true; dev_set_uevent_suppress(f_dev, true); retval = device_add(f_dev); if (retval) { dev_err(f_dev, "%s: device_register failed\n", __func__); goto err_put_dev; } mutex_lock(&fw_lock); if (fw_load_abort_all || fw_state_is_aborted(fw_priv)) { mutex_unlock(&fw_lock); retval = -EINTR; goto out; } list_add(&fw_priv->pending_list, &pending_fw_head); mutex_unlock(&fw_lock); if (fw_priv->opt_flags & FW_OPT_UEVENT) { fw_priv->need_uevent = true; dev_set_uevent_suppress(f_dev, false); dev_dbg(f_dev, "firmware: requesting %s\n", fw_priv->fw_name); kobject_uevent(&fw_sysfs->dev.kobj, KOBJ_ADD); } else { timeout = MAX_JIFFY_OFFSET; } retval = fw_sysfs_wait_timeout(fw_priv, timeout); if (retval < 0 && retval != -ENOENT) { mutex_lock(&fw_lock); fw_load_abort(fw_sysfs); mutex_unlock(&fw_lock); } if (fw_state_is_aborted(fw_priv)) { if (retval == -ERESTARTSYS) retval = -EINTR; } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; out: device_del(f_dev); err_put_dev: put_device(f_dev); return retval; } static int fw_load_from_user_helper(struct firmware *firmware, const char *name, struct device *device, u32 opt_flags) { struct fw_sysfs *fw_sysfs; long timeout; int ret; timeout = firmware_loading_timeout(); if (opt_flags & FW_OPT_NOWAIT) { timeout = usermodehelper_read_lock_wait(timeout); if (!timeout) { dev_dbg(device, "firmware: %s loading timed out\n", name); return -EBUSY; } } else { ret = usermodehelper_read_trylock(); if (WARN_ON(ret)) { dev_err(device, "firmware: %s will not be loaded\n", name); return ret; } } fw_sysfs = fw_create_instance(firmware, name, device, opt_flags); if (IS_ERR(fw_sysfs)) { ret = PTR_ERR(fw_sysfs); goto out_unlock; } fw_sysfs->fw_priv = firmware->priv; ret = fw_load_sysfs_fallback(fw_sysfs, timeout); if (!ret) ret = assign_fw(firmware, device); out_unlock: usermodehelper_read_unlock(); return ret; } static bool fw_force_sysfs_fallback(u32 opt_flags) { if (fw_fallback_config.force_sysfs_fallback) return true; if (!(opt_flags & FW_OPT_USERHELPER)) return false; return true; } static bool fw_run_sysfs_fallback(u32 opt_flags) { int ret; if (fw_fallback_config.ignore_sysfs_fallback) { pr_info_once("Ignoring firmware sysfs fallback due to sysctl knob\n"); return false; } if ((opt_flags & FW_OPT_NOFALLBACK_SYSFS)) return false; /* Also permit LSMs and IMA to fail firmware sysfs fallback */ ret = security_kernel_load_data(LOADING_FIRMWARE, true); if (ret < 0) return false; return fw_force_sysfs_fallback(opt_flags); } /** * firmware_fallback_sysfs() - use the fallback mechanism to find firmware * @fw: pointer to firmware image * @name: name of firmware file to look for * @device: device for which firmware is being loaded * @opt_flags: options to control firmware loading behaviour, as defined by * &enum fw_opt * @ret: return value from direct lookup which triggered the fallback mechanism * * This function is called if direct lookup for the firmware failed, it enables * a fallback mechanism through userspace by exposing a sysfs loading * interface. Userspace is in charge of loading the firmware through the sysfs * loading interface. This sysfs fallback mechanism may be disabled completely * on a system by setting the proc sysctl value ignore_sysfs_fallback to true. * If this is false we check if the internal API caller set the * @FW_OPT_NOFALLBACK_SYSFS flag, if so it would also disable the fallback * mechanism. A system may want to enforce the sysfs fallback mechanism at all * times, it can do this by setting ignore_sysfs_fallback to false and * force_sysfs_fallback to true. * Enabling force_sysfs_fallback is functionally equivalent to build a kernel * with CONFIG_FW_LOADER_USER_HELPER_FALLBACK. **/ int firmware_fallback_sysfs(struct firmware *fw, const char *name, struct device *device, u32 opt_flags, int ret) { if (!fw_run_sysfs_fallback(opt_flags)) return ret; if (!(opt_flags & FW_OPT_NO_WARN)) dev_warn(device, "Falling back to sysfs fallback for: %s\n", name); else dev_dbg(device, "Falling back to sysfs fallback for: %s\n", name); return fw_load_from_user_helper(fw, name, device, opt_flags); }
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2012 Pablo Neira Ayuso <pablo@netfilter.org> * * This software has been sponsored by Vyatta Inc. <http://www.vyatta.com> */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/list.h> #include <linux/errno.h> #include <linux/capability.h> #include <net/netlink.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/netfilter/nfnetlink_cthelper.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); struct nfnl_cthelper { struct list_head list; struct nf_conntrack_helper helper; }; static LIST_HEAD(nfnl_cthelper_list); static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { const struct nf_conn_help *help; struct nf_conntrack_helper *helper; help = nfct_help(ct); if (help == NULL) return NF_DROP; /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (helper == NULL) return NF_DROP; /* This is a user-space helper not yet configured, skip. */ if ((helper->flags & (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == NF_CT_HELPER_F_USERSPACE) return NF_ACCEPT; /* If the user-space helper is not available, don't block traffic. */ return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS; } static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = { [NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, }, [NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, }, }; static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; /* Not all fields are initialized so first zero the tuple */ memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); return 0; } static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); const struct nf_conntrack_helper *helper; if (attr == NULL) return -EINVAL; helper = rcu_dereference(help->helper); if (!helper || helper->data_len == 0) return -EINVAL; nla_memcpy(help->data, attr, sizeof(help->data)); return 0; } static int nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct) { const struct nf_conn_help *help = nfct_help(ct); const struct nf_conntrack_helper *helper; helper = rcu_dereference(help->helper); if (helper && helper->data_len && nla_put(skb, CTA_HELP_INFO, helper->data_len, &help->data)) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = { [NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, }, [NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, }, }; static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; nla_strscpy(expect_policy->name, tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; expect_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); return 0; } static const struct nla_policy nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, }; static int nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, const struct nlattr *attr) { int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; unsigned int class_max; ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set, NULL); if (ret < 0) return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); if (class_max == 0) return -EINVAL; if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kcalloc(class_max, sizeof(struct nf_conntrack_expect_policy), GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; ret = nfnl_cthelper_expect_policy(&expect_policy[i], tb[NFCTH_POLICY_SET+i]); if (ret < 0) goto err; } helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: kfree(expect_policy); return -EINVAL; } static int nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; struct nfnl_cthelper *nfcth; unsigned int size; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); if (nfcth == NULL) return -ENOMEM; helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) goto err1; nla_strscpy(helper->name, tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > sizeof_field(struct nf_conn_help, data)) { ret = -ENOMEM; goto err2; } helper->data_len = size; helper->flags |= NF_CT_HELPER_F_USERSPACE; memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); helper->me = THIS_MODULE; helper->help = nfnl_userspace_cthelper; helper->from_nlattr = nfnl_cthelper_from_nlattr; helper->to_nlattr = nfnl_cthelper_to_nlattr; /* Default to queue number zero, this can be updated at any time. */ if (tb[NFCTH_QUEUE_NUM]) helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); if (tb[NFCTH_STATUS]) { int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); switch(status) { case NFCT_HELPER_STATUS_ENABLED: helper->flags |= NF_CT_HELPER_F_CONFIGURED; break; case NFCT_HELPER_STATUS_DISABLED: helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; break; } } ret = nf_conntrack_helper_register(helper); if (ret < 0) goto err2; list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; err2: kfree(helper->expect_policy); err1: kfree(nfcth); return ret; } static int nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, struct nf_conntrack_expect_policy *new_policy, const struct nlattr *attr) { struct nlattr *tb[NFCTH_POLICY_MAX + 1]; int err; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) return -EBUSY; new_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; new_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); return 0; } static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], struct nf_conntrack_helper *helper) { struct nf_conntrack_expect_policy *new_policy; struct nf_conntrack_expect_policy *policy; int i, ret = 0; new_policy = kmalloc_array(helper->expect_class_max + 1, sizeof(*new_policy), GFP_KERNEL); if (!new_policy) return -ENOMEM; /* Check first that all policy attributes are well-formed, so we don't * leave things in inconsistent state on errors. */ for (i = 0; i < helper->expect_class_max + 1; i++) { if (!tb[NFCTH_POLICY_SET + i]) { ret = -EINVAL; goto err; } ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], &new_policy[i], tb[NFCTH_POLICY_SET + i]); if (ret < 0) goto err; } /* Now we can safely update them. */ for (i = 0; i < helper->expect_class_max + 1; i++) { policy = (struct nf_conntrack_expect_policy *) &helper->expect_policy[i]; policy->max_expected = new_policy->max_expected; policy->timeout = new_policy->timeout; } err: kfree(new_policy); return ret; } static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, const struct nlattr *attr) { struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; unsigned int class_max; int err; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); if (helper->expect_class_max + 1 != class_max) return -EBUSY; return nfnl_cthelper_update_policy_all(tb, helper); } static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { u32 size; int ret; if (tb[NFCTH_PRIV_DATA_LEN]) { size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size != helper->data_len) return -EBUSY; } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } if (tb[NFCTH_QUEUE_NUM]) helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); if (tb[NFCTH_STATUS]) { int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); switch(status) { case NFCT_HELPER_STATUS_ENABLED: helper->flags |= NF_CT_HELPER_F_CONFIGURED; break; case NFCT_HELPER_STATUS_DISABLED: helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; break; } } return 0; } static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; struct nfnl_cthelper *nlcth; int ret = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; helper_name = nla_data(tb[NFCTH_NAME]); ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { cur = &nlcth->helper; if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if ((tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; if (info->nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; helper = cur; break; } if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); else ret = nfnl_cthelper_update(tb, helper); return ret; } static int nfnl_cthelper_dump_tuple(struct sk_buff *skb, struct nf_conntrack_helper *helper) { struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, NFCTH_TUPLE); if (nest_parms == NULL) goto nla_put_failure; if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM, htons(helper->tuple.src.l3num))) goto nla_put_failure; if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum)) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int nfnl_cthelper_dump_policy(struct sk_buff *skb, struct nf_conntrack_helper *helper) { int i; struct nlattr *nest_parms1, *nest_parms2; nest_parms1 = nla_nest_start(skb, NFCTH_POLICY); if (nest_parms1 == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, htonl(helper->expect_class_max + 1))) goto nla_put_failure; for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i)); if (nest_parms2 == NULL) goto nla_put_failure; if (nla_put_string(skb, NFCTH_POLICY_NAME, helper->expect_policy[i].name)) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX, htonl(helper->expect_policy[i].max_expected))) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT, htonl(helper->expect_policy[i].timeout))) goto nla_put_failure; nla_nest_end(skb, nest_parms2); } nla_nest_end(skb, nest_parms1); return 0; nla_put_failure: return -1; } static int nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_string(skb, NFCTH_NAME, helper->name)) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num))) goto nla_put_failure; if (nfnl_cthelper_dump_tuple(skb, helper) < 0) goto nla_put_failure; if (nfnl_cthelper_dump_policy(skb, helper) < 0) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len))) goto nla_put_failure; if (helper->flags & NF_CT_HELPER_F_CONFIGURED) status = NFCT_HELPER_STATUS_ENABLED; else status = NFCT_HELPER_STATUS_DISABLED; if (nla_put_be32(skb, NFCTH_STATUS, htonl(status))) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_helper *cur, *last; rcu_read_lock(); last = (struct nf_conntrack_helper *)cb->args[1]; for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[cb->args[0]], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) continue; if (cb->args[1]) { if (cur != last) continue; cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { cb->args[1] = (unsigned long)cur; goto out; } } } if (cb->args[1]) { cb->args[1] = 0; goto restart; } out: rcu_read_unlock(); return skb->len; } static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; struct nfnl_cthelper *nlcth; bool tuple_set = false; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); if (tb[NFCTH_TUPLE]) { ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; tuple_set = true; } list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { cur = &nlcth->helper; if (helper_name && strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if (tuple_set && (tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { ret = -ENOMEM; break; } ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); if (ret <= 0) { kfree_skb(skb2); break; } ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); break; } return ret; } static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { char *helper_name = NULL; struct nf_conntrack_helper *cur; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; struct nfnl_cthelper *nlcth, *n; int j = 0, ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); if (tb[NFCTH_TUPLE]) { ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; tuple_set = true; } ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; if (helper_name && strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if (tuple_set && (tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; if (refcount_dec_if_one(&cur->refcnt)) { found = true; nf_conntrack_helper_unregister(cur); kfree(cur->expect_policy); list_del(&nlcth->list); kfree(nlcth); } else { ret = -EBUSY; } } /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, }; static const struct nfnetlink_subsystem nfnl_cthelper_subsys = { .name = "cthelper", .subsys_id = NFNL_SUBSYS_CTHELPER, .cb_count = NFNL_MSG_CTHELPER_MAX, .cb = nfnl_cthelper_cb, }; MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER); static int __init nfnl_cthelper_init(void) { int ret; ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys); if (ret < 0) { pr_err("nfnl_cthelper: cannot register with nfnetlink.\n"); goto err_out; } return 0; err_out: return ret; } static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; nf_conntrack_helper_unregister(cur); kfree(cur->expect_policy); kfree(nlcth); } } module_init(nfnl_cthelper_init); module_exit(nfnl_cthelper_exit);
36 37 73 73 73 74 74 37 46 46 46 46 46 46 10 10 10 10 10 10 175 176 176 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2006, Johannes Berg <johannes@sipsolutions.net> */ /* just for IFNAMSIZ */ #include <linux/if.h> #include <linux/slab.h> #include <linux/export.h> #include "led.h" void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { if (!atomic_read(&local->assoc_led_active)) return; if (associated) led_trigger_event(&local->assoc_led, LED_FULL); else led_trigger_event(&local->assoc_led, LED_OFF); } void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { if (!atomic_read(&local->radio_led_active)) return; if (enabled) led_trigger_event(&local->radio_led, LED_FULL); else led_trigger_event(&local->radio_led, LED_OFF); } void ieee80211_alloc_led_names(struct ieee80211_local *local) { local->rx_led.name = kasprintf(GFP_KERNEL, "%srx", wiphy_name(local->hw.wiphy)); local->tx_led.name = kasprintf(GFP_KERNEL, "%stx", wiphy_name(local->hw.wiphy)); local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc", wiphy_name(local->hw.wiphy)); local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio", wiphy_name(local->hw.wiphy)); } void ieee80211_free_led_names(struct ieee80211_local *local) { kfree(local->rx_led.name); kfree(local->tx_led.name); kfree(local->assoc_led.name); kfree(local->radio_led.name); } static int ieee80211_tx_led_activate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, tx_led); atomic_inc(&local->tx_led_active); return 0; } static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, tx_led); atomic_dec(&local->tx_led_active); } static int ieee80211_rx_led_activate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, rx_led); atomic_inc(&local->rx_led_active); return 0; } static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, rx_led); atomic_dec(&local->rx_led_active); } static int ieee80211_assoc_led_activate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, assoc_led); atomic_inc(&local->assoc_led_active); return 0; } static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, assoc_led); atomic_dec(&local->assoc_led_active); } static int ieee80211_radio_led_activate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, radio_led); atomic_inc(&local->radio_led_active); return 0; } static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, radio_led); atomic_dec(&local->radio_led_active); } static int ieee80211_tpt_led_activate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, tpt_led); atomic_inc(&local->tpt_led_active); return 0; } static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev) { struct ieee80211_local *local = container_of(led_cdev->trigger, struct ieee80211_local, tpt_led); atomic_dec(&local->tpt_led_active); } void ieee80211_led_init(struct ieee80211_local *local) { atomic_set(&local->rx_led_active, 0); local->rx_led.activate = ieee80211_rx_led_activate; local->rx_led.deactivate = ieee80211_rx_led_deactivate; if (local->rx_led.name && led_trigger_register(&local->rx_led)) { kfree(local->rx_led.name); local->rx_led.name = NULL; } atomic_set(&local->tx_led_active, 0); local->tx_led.activate = ieee80211_tx_led_activate; local->tx_led.deactivate = ieee80211_tx_led_deactivate; if (local->tx_led.name && led_trigger_register(&local->tx_led)) { kfree(local->tx_led.name); local->tx_led.name = NULL; } atomic_set(&local->assoc_led_active, 0); local->assoc_led.activate = ieee80211_assoc_led_activate; local->assoc_led.deactivate = ieee80211_assoc_led_deactivate; if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) { kfree(local->assoc_led.name); local->assoc_led.name = NULL; } atomic_set(&local->radio_led_active, 0); local->radio_led.activate = ieee80211_radio_led_activate; local->radio_led.deactivate = ieee80211_radio_led_deactivate; if (local->radio_led.name && led_trigger_register(&local->radio_led)) { kfree(local->radio_led.name); local->radio_led.name = NULL; } atomic_set(&local->tpt_led_active, 0); if (local->tpt_led_trigger) { local->tpt_led.activate = ieee80211_tpt_led_activate; local->tpt_led.deactivate = ieee80211_tpt_led_deactivate; if (led_trigger_register(&local->tpt_led)) { kfree(local->tpt_led_trigger); local->tpt_led_trigger = NULL; } } } void ieee80211_led_exit(struct ieee80211_local *local) { if (local->radio_led.name) led_trigger_unregister(&local->radio_led); if (local->assoc_led.name) led_trigger_unregister(&local->assoc_led); if (local->tx_led.name) led_trigger_unregister(&local->tx_led); if (local->rx_led.name) led_trigger_unregister(&local->rx_led); if (local->tpt_led_trigger) { led_trigger_unregister(&local->tpt_led); kfree(local->tpt_led_trigger); } } const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); return local->radio_led.name; } EXPORT_SYMBOL(__ieee80211_get_radio_led_name); const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); return local->assoc_led.name; } EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); return local->tx_led.name; } EXPORT_SYMBOL(__ieee80211_get_tx_led_name); const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); return local->rx_led.name; } EXPORT_SYMBOL(__ieee80211_get_rx_led_name); static unsigned long tpt_trig_traffic(struct ieee80211_local *local, struct tpt_led_trigger *tpt_trig) { unsigned long traffic, delta; traffic = tpt_trig->tx_bytes + tpt_trig->rx_bytes; delta = traffic - tpt_trig->prev_traffic; tpt_trig->prev_traffic = traffic; return DIV_ROUND_UP(delta, 1024 / 8); } static void tpt_trig_timer(struct timer_list *t) { struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer); struct ieee80211_local *local = tpt_trig->local; unsigned long on, off, tpt; int i; if (!tpt_trig->running) return; mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); tpt = tpt_trig_traffic(local, tpt_trig); /* default to just solid on */ on = 1; off = 0; for (i = tpt_trig->blink_table_len - 1; i >= 0; i--) { if (tpt_trig->blink_table[i].throughput < 0 || tpt > tpt_trig->blink_table[i].throughput) { off = tpt_trig->blink_table[i].blink_time / 2; on = tpt_trig->blink_table[i].blink_time - off; break; } } led_trigger_blink(&local->tpt_led, on, off); } const char * __ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) { struct ieee80211_local *local = hw_to_local(hw); struct tpt_led_trigger *tpt_trig; if (WARN_ON(local->tpt_led_trigger)) return NULL; tpt_trig = kzalloc(sizeof(struct tpt_led_trigger), GFP_KERNEL); if (!tpt_trig) return NULL; snprintf(tpt_trig->name, sizeof(tpt_trig->name), "%stpt", wiphy_name(local->hw.wiphy)); local->tpt_led.name = tpt_trig->name; tpt_trig->blink_table = blink_table; tpt_trig->blink_table_len = blink_table_len; tpt_trig->want = flags; tpt_trig->local = local; timer_setup(&tpt_trig->timer, tpt_trig_timer, 0); local->tpt_led_trigger = tpt_trig; return tpt_trig->name; } EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger); static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; if (tpt_trig->running) return; /* reset traffic */ tpt_trig_traffic(local, tpt_trig); tpt_trig->running = true; tpt_trig_timer(&tpt_trig->timer); mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); } static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; if (!tpt_trig->running) return; tpt_trig->running = false; del_timer_sync(&tpt_trig->timer); led_trigger_event(&local->tpt_led, LED_OFF); } void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; bool allowed; WARN_ON(types_on & types_off); if (!tpt_trig) return; tpt_trig->active &= ~types_off; tpt_trig->active |= types_on; /* * Regardless of wanted state, we shouldn't blink when * the radio is disabled -- this can happen due to some * code ordering issues with __ieee80211_recalc_idle() * being called before the radio is started. */ allowed = tpt_trig->active & IEEE80211_TPT_LEDTRIG_FL_RADIO; if (!allowed || !(tpt_trig->active & tpt_trig->want)) ieee80211_stop_tpt_led_trig(local); else ieee80211_start_tpt_led_trig(local); }
64 64 62 62 52 13 1 12 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> * Network name space (netns) aware. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> #include <linux/indirect_call_wrapper.h> #include <net/ip_vs.h> #include <net/ip.h> #include <net/ip6_checksum.h> static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; struct udphdr _udph, *uh; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { /* IPv6 fragments, only first fragment will hit this */ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh) ports = &uh->source; } else { ports = skb_header_pointer( skb, iph->len, sizeof(_ports), &_ports); } if (!ports) { *verdict = NF_DROP; return 0; } if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); else svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->saddr, ports[0]); if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( */ *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; return 0; } } /* NF_ACCEPT */ return 1; } static inline void udp_fast_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); else #endif uhdr->check = csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } static inline void udp_partial_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldlen, __be16 newlen) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); else #endif uhdr->check = ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); } INDIRECT_CALLABLE_SCOPE int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Call application helper if needed */ if (!(ret = ip_vs_app_pkt_out(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->vaddr.in6, &cp->caddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); } return 1; } static int udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ if (!(ret = ip_vs_app_pkt_in(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->caddr.in6, &cp->daddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; unsigned int udphoff; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) udphoff = sizeof(struct ipv6hdr); else #endif udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) return 0; if (uh->check != 0) { switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); fallthrough; case CHECKSUM_COMPLETE: #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - udphoff, ipv6_hdr(skb)->nexthdr, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } } else #endif if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - udphoff, ip_hdr(skb)->protocol, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } break; default: /* No need to checksum. */ break; } } return 1; } static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) & UDP_APP_TAB_MASK; } static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); hash = udp_app_hashkey(port); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: return ret; } static void udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; /* Default binding: bind app only for NAT */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return 0; /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", __func__, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); break; } } return result; } static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = "UDP", [IP_VS_UDP_S_LAST] = "BUG!", }; static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) return "ERR!"; return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; } static void udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return; } cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; if (direction == IP_VS_DIR_OUTPUT) ip_vs_control_assure_ct(cp); } static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) return -ENOMEM; return 0; } static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_udp = { .name = "UDP", .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, .init = NULL, .exit = NULL, .init_netns = __udp_init, .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .state_transition = udp_state_transition, .state_name = udp_state_name, .register_app = udp_register_app, .unregister_app = udp_unregister_app, .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, };
76 76 106 67 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 // SPDX-License-Identifier: GPL-2.0-or-later /****************************************************************************** * vlanproc.c VLAN Module. /proc filesystem interface. * * This module is completely hardware-independent and provides * access to the router using Linux /proc filesystem. * * Author: Ben Greear, <greearb@candelatech.com> coppied from wanproc.c * by: Gene Kozin <genek@compuserve.com> * * Copyright: (c) 1998 Ben Greear * * ============================================================================ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/fs.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include "vlanproc.h" #include "vlan.h" /****** Function Prototypes *************************************************/ /* Methods for preparing data for reading proc entries */ static int vlan_seq_show(struct seq_file *seq, void *v); static void *vlan_seq_start(struct seq_file *seq, loff_t *pos); static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos); static void vlan_seq_stop(struct seq_file *seq, void *); static int vlandev_seq_show(struct seq_file *seq, void *v); /* * Global Data */ /* * Names of the proc directory entries */ static const char name_root[] = "vlan"; static const char name_conf[] = "config"; /* * Structures for interfacing with the /proc filesystem. * VLAN creates its own directory /proc/net/vlan with the following * entries: * config device status/configuration * <device> entry for each device */ /* * Generic /proc/net/vlan/<file> file and inode operations */ static const struct seq_operations vlan_seq_ops = { .start = vlan_seq_start, .next = vlan_seq_next, .stop = vlan_seq_stop, .show = vlan_seq_show, }; /* * Proc filesystem directory entries. */ /* Strings */ static const char *const vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = { [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID", [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD", [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD", [VLAN_NAME_TYPE_PLUS_VID] = "VLAN_NAME_TYPE_PLUS_VID", }; /* * Interface functions */ /* * Clean up /proc/net/vlan entries */ void vlan_proc_cleanup(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); if (vn->proc_vlan_conf) remove_proc_entry(name_conf, vn->proc_vlan_dir); if (vn->proc_vlan_dir) remove_proc_entry(name_root, net->proc_net); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... */ } /* * Create /proc/net/vlan entries */ int __net_init vlan_proc_init(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net); if (!vn->proc_vlan_dir) goto err; vn->proc_vlan_conf = proc_create_net(name_conf, S_IFREG | 0600, vn->proc_vlan_dir, &vlan_seq_ops, sizeof(struct seq_net_private)); if (!vn->proc_vlan_conf) goto err; return 0; err: pr_err("can't create entry in proc filesystem!\n"); vlan_proc_cleanup(net); return -ENOBUFS; } /* * Add directory entry for VLAN device. */ int vlan_proc_add_dev(struct net_device *vlandev) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id); if (!strcmp(vlandev->name, name_conf)) return -EINVAL; vlan->dent = proc_create_single_data(vlandev->name, S_IFREG | 0600, vn->proc_vlan_dir, vlandev_seq_show, vlandev); if (!vlan->dent) return -ENOBUFS; return 0; } /* * Delete directory entry for VLAN device. */ void vlan_proc_rem_dev(struct net_device *vlandev) { /** NOTE: This will consume the memory pointed to by dent, it seems. */ proc_remove(vlan_dev_priv(vlandev)->dent); vlan_dev_priv(vlandev)->dent = NULL; } /****** Proc filesystem entry points ****************************************/ /* * The following few functions build the content of /proc/net/vlan/config */ static void *vlan_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { if (!is_vlan_dev(dev)) continue; *pos = dev->ifindex; return dev; } return NULL; } static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) __acquires(rcu) { rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; return vlan_seq_from_index(seq, pos); } static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return vlan_seq_from_index(seq, pos); } static void vlan_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { rcu_read_unlock(); } static int vlan_seq_show(struct seq_file *seq, void *v) { struct net *net = seq_file_net(seq); struct vlan_net *vn = net_generic(net, vlan_net_id); if (v == SEQ_START_TOKEN) { const char *nmtype = NULL; seq_puts(seq, "VLAN Dev name | VLAN ID\n"); if (vn->name_type < ARRAY_SIZE(vlan_name_type_str)) nmtype = vlan_name_type_str[vn->name_type]; seq_printf(seq, "Name-Type: %s\n", nmtype ? nmtype : "UNKNOWN"); } else { const struct net_device *vlandev = v; const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); seq_printf(seq, "%-15s| %d | %s\n", vlandev->name, vlan->vlan_id, vlan->real_dev->name); } return 0; } static int vlandev_seq_show(struct seq_file *seq, void *offset) { struct net_device *vlandev = (struct net_device *) seq->private; const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; static const char fmt64[] = "%30s %12llu\n"; int i; if (!is_vlan_dev(vlandev)) return 0; stats = dev_get_stats(vlandev, &temp); seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %x\n", vlandev->name, vlan->vlan_id, (int)(vlan->flags & 1), (u32)vlandev->priv_flags); seq_printf(seq, fmt64, "total frames received", stats->rx_packets); seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast); seq_puts(seq, "\n"); seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); seq_printf(seq, "Device: %s", vlan->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, "\nINGRESS priority mappings: " "0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", vlan->ingress_priority_map[0], vlan->ingress_priority_map[1], vlan->ingress_priority_map[2], vlan->ingress_priority_map[3], vlan->ingress_priority_map[4], vlan->ingress_priority_map[5], vlan->ingress_priority_map[6], vlan->ingress_priority_map[7]); seq_printf(seq, " EGRESS priority mappings: "); for (i = 0; i < 16; i++) { const struct vlan_priority_tci_mapping *mp = vlan->egress_priority_map[i]; while (mp) { seq_printf(seq, "%u:%d ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); mp = mp->next; } } seq_puts(seq, "\n"); return 0; }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_MLD_H #define LINUX_MLD_H #include <linux/in6.h> #include <linux/icmpv6.h> /* MLDv1 Query/Report/Done */ struct mld_msg { struct icmp6hdr mld_hdr; struct in6_addr mld_mca; }; #define mld_type mld_hdr.icmp6_type #define mld_code mld_hdr.icmp6_code #define mld_cksum mld_hdr.icmp6_cksum #define mld_maxdelay mld_hdr.icmp6_maxdelay #define mld_reserved mld_hdr.icmp6_dataun.un_data16[1] /* Multicast Listener Discovery version 2 headers */ /* MLDv2 Report */ struct mld2_grec { __u8 grec_type; __u8 grec_auxwords; __be16 grec_nsrcs; struct in6_addr grec_mca; struct in6_addr grec_src[]; }; struct mld2_report { struct icmp6hdr mld2r_hdr; struct mld2_grec mld2r_grec[]; }; #define mld2r_type mld2r_hdr.icmp6_type #define mld2r_resv1 mld2r_hdr.icmp6_code #define mld2r_cksum mld2r_hdr.icmp6_cksum #define mld2r_resv2 mld2r_hdr.icmp6_dataun.un_data16[0] #define mld2r_ngrec mld2r_hdr.icmp6_dataun.un_data16[1] /* MLDv2 Query */ struct mld2_query { struct icmp6hdr mld2q_hdr; struct in6_addr mld2q_mca; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 mld2q_qrv:3, mld2q_suppress:1, mld2q_resv2:4; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 mld2q_resv2:4, mld2q_suppress:1, mld2q_qrv:3; #else #error "Please fix <asm/byteorder.h>" #endif __u8 mld2q_qqic; __be16 mld2q_nsrcs; struct in6_addr mld2q_srcs[]; }; #define mld2q_type mld2q_hdr.icmp6_type #define mld2q_code mld2q_hdr.icmp6_code #define mld2q_cksum mld2q_hdr.icmp6_cksum #define mld2q_mrc mld2q_hdr.icmp6_maxdelay #define mld2q_resv1 mld2q_hdr.icmp6_dataun.un_data16[1] /* RFC3810, 5.1.3. Maximum Response Code: * * If Maximum Response Code >= 32768, Maximum Response Code represents a * floating-point value as follows: * * 0 1 2 3 4 5 6 7 8 9 A B C D E F * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define MLDV2_MRC_EXP(value) (((value) >> 12) & 0x0007) #define MLDV2_MRC_MAN(value) ((value) & 0x0fff) /* RFC3810, 5.1.9. QQIC (Querier's Query Interval Code): * * If QQIC >= 128, QQIC represents a floating-point value as follows: * * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ * |1| exp | mant | * +-+-+-+-+-+-+-+-+ */ #define MLDV2_QQIC_EXP(value) (((value) >> 4) & 0x07) #define MLDV2_QQIC_MAN(value) ((value) & 0x0f) #define MLD_EXP_MIN_LIMIT 32768UL #define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1) #define MLD_MAX_QUEUE 8 #define MLD_MAX_SKBS 32 static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2) { /* RFC3810, 5.1.3. Maximum Response Code */ unsigned long ret, mc_mrc = ntohs(mlh2->mld2q_mrc); if (mc_mrc < MLD_EXP_MIN_LIMIT) { ret = mc_mrc; } else { unsigned long mc_man, mc_exp; mc_exp = MLDV2_MRC_EXP(mc_mrc); mc_man = MLDV2_MRC_MAN(mc_mrc); ret = (mc_man | 0x1000) << (mc_exp + 3); } return ret; } #endif
1375 4221 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 /* SPDX-License-Identifier: GPL-2.0 */ /* * Kernel Electric-Fence (KFENCE). For more info please see * Documentation/dev-tools/kfence.rst. * * Copyright (C) 2020, Google LLC. */ #ifndef MM_KFENCE_KFENCE_H #define MM_KFENCE_KFENCE_H #include <linux/mm.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include "../slab.h" /* for struct kmem_cache */ /* * Get the canary byte pattern for @addr. Use a pattern that varies based on the * lower 3 bits of the address, to detect memory corruptions with higher * probability, where similar constants are used. */ #define KFENCE_CANARY_PATTERN_U8(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7)) /* * Define a continuous 8-byte canary starting from a multiple of 8. The canary * of each byte is only related to the lowest three bits of its address, so the * canary of every 8 bytes is the same. 64-bit memory can be filled and checked * at a time instead of byte by byte to improve performance. */ #define KFENCE_CANARY_PATTERN_U64 ((u64)0xaaaaaaaaaaaaaaaa ^ (u64)(le64_to_cpu(0x0706050403020100))) /* Maximum stack depth for reports. */ #define KFENCE_STACK_DEPTH 64 /* KFENCE object states. */ enum kfence_object_state { KFENCE_OBJECT_UNUSED, /* Object is unused. */ KFENCE_OBJECT_ALLOCATED, /* Object is currently allocated. */ KFENCE_OBJECT_RCU_FREEING, /* Object was allocated, and then being freed by rcu. */ KFENCE_OBJECT_FREED, /* Object was allocated, and then freed. */ }; /* Alloc/free tracking information. */ struct kfence_track { pid_t pid; int cpu; u64 ts_nsec; int num_stack_entries; unsigned long stack_entries[KFENCE_STACK_DEPTH]; }; /* KFENCE metadata per guarded allocation. */ struct kfence_metadata { struct list_head list; /* Freelist node; access under kfence_freelist_lock. */ struct rcu_head rcu_head; /* For delayed freeing. */ /* * Lock protecting below data; to ensure consistency of the below data, * since the following may execute concurrently: __kfence_alloc(), * __kfence_free(), kfence_handle_page_fault(). However, note that we * cannot grab the same metadata off the freelist twice, and multiple * __kfence_alloc() cannot run concurrently on the same metadata. */ raw_spinlock_t lock; /* The current state of the object; see above. */ enum kfence_object_state state; /* * Allocated object address; cannot be calculated from size, because of * alignment requirements. * * Invariant: ALIGN_DOWN(addr, PAGE_SIZE) is constant. */ unsigned long addr; /* * The size of the original allocation. */ size_t size; /* * The kmem_cache cache of the last allocation; NULL if never allocated * or the cache has already been destroyed. */ struct kmem_cache *cache; /* * In case of an invalid access, the page that was unprotected; we * optimistically only store one address. */ unsigned long unprotected_page; /* Allocation and free stack information. */ struct kfence_track alloc_track; struct kfence_track free_track; /* For updating alloc_covered on frees. */ u32 alloc_stack_hash; #ifdef CONFIG_MEMCG struct slabobj_ext obj_exts; #endif }; #define KFENCE_METADATA_SIZE PAGE_ALIGN(sizeof(struct kfence_metadata) * \ CONFIG_KFENCE_NUM_OBJECTS) extern struct kfence_metadata *kfence_metadata; static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) { long index; /* The checks do not affect performance; only called from slow-paths. */ if (!is_kfence_address((void *)addr)) return NULL; /* * May be an invalid index if called with an address at the edge of * __kfence_pool, in which case we would report an "invalid access" * error. */ index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) return NULL; return &kfence_metadata[index]; } /* KFENCE error types for report generation. */ enum kfence_error_type { KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */ KFENCE_ERROR_UAF, /* Detected a use-after-free access. */ KFENCE_ERROR_CORRUPTION, /* Detected a memory corruption on free. */ KFENCE_ERROR_INVALID, /* Invalid access of unknown type. */ KFENCE_ERROR_INVALID_FREE, /* Invalid free. */ }; void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs, const struct kfence_metadata *meta, enum kfence_error_type type); void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta); #endif /* MM_KFENCE_KFENCE_H */
76 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_skbmod.c skb data modifier * * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com> */ #include <linux/module.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/inet_ecn.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> static struct tc_action_ops act_skbmod_ops; TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action, max_edit_len, err; struct tcf_skbmod_params *p; u64 flags; tcf_lastuse_update(&d->tcf_tm); bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); action = READ_ONCE(d->tcf_action); if (unlikely(action == TC_ACT_SHOT)) goto drop; max_edit_len = skb_mac_header_len(skb); p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; /* tcf_skbmod_init() guarantees "flags" to be one of the following: * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE} * 2. SKBMOD_F_SWAPMAC * 3. SKBMOD_F_ECN * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet * packets. */ if (flags == SKBMOD_F_ECN) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): case cpu_to_be16(ETH_P_IPV6): max_edit_len += skb_network_header_len(skb); break; default: goto out; } } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) { goto out; } err = skb_ensure_writable(skb, max_edit_len); if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = p->eth_type; if (flags & SKBMOD_F_SWAPMAC) { u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ /*XXX: I am sure we can come up with more efficient swapping*/ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); } if (flags & SKBMOD_F_ECN) INET_ECN_set_ce(skb); out: return action; drop: qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); return TC_ACT_SHOT; } static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, }; static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); bool ovr = flags & TCA_ACT_FLAGS_REPLACE; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; u32 lflags = 0, index; struct tcf_skbmod *d; bool exists = false; u8 *daddr = NULL; u8 *saddr = NULL; u16 eth_type = 0; int ret = 0, err; if (!nla) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); if (err < 0) return err; if (!tb[TCA_SKBMOD_PARMS]) return -EINVAL; if (tb[TCA_SKBMOD_DMAC]) { daddr = nla_data(tb[TCA_SKBMOD_DMAC]); lflags |= SKBMOD_F_DMAC; } if (tb[TCA_SKBMOD_SMAC]) { saddr = nla_data(tb[TCA_SKBMOD_SMAC]); lflags |= SKBMOD_F_SMAC; } if (tb[TCA_SKBMOD_ETYPE]) { eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); lflags |= SKBMOD_F_ETYPE; } parm = nla_data(tb[TCA_SKBMOD_PARMS]); index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; if (parm->flags & SKBMOD_F_ECN) lflags = SKBMOD_F_ECN; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (!lflags) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_skbmod_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!ovr) { tcf_idr_release(*a, bind); return -EEXIST; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { err = -ENOMEM; goto put_chain; } p->flags = lflags; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) ether_addr_copy(p->eth_dst, daddr); if (lflags & SKBMOD_F_SMAC) ether_addr_copy(p->eth_src, saddr); if (lflags & SKBMOD_F_ETYPE) p->eth_type = htons(eth_type); rcu_assign_pointer(d->skbmod_p, p); if (ovr) spin_unlock_bh(&d->tcf_lock); if (p_old) kfree_rcu(p_old, rcu); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static void tcf_skbmod_cleanup(struct tc_action *a) { struct tcf_skbmod *d = to_skbmod(a); struct tcf_skbmod_params *p; p = rcu_dereference_protected(d->skbmod_p, 1); if (p) kfree_rcu(p, rcu); } static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; struct tc_skbmod opt; struct tcf_t t; memset(&opt, 0, sizeof(opt)); opt.index = d->tcf_index; opt.refcnt = refcount_read(&d->tcf_refcnt) - ref; opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, lockdep_is_held(&d->tcf_lock)); opt.flags = p->flags; if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((p->flags & SKBMOD_F_DMAC) && nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; if ((p->flags & SKBMOD_F_SMAC) && nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; if ((p->flags & SKBMOD_F_ETYPE) && nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) goto nla_put_failure; spin_unlock_bh(&d->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, .init = tcf_skbmod_init, .cleanup = tcf_skbmod_cleanup, .size = sizeof(struct tcf_skbmod), }; MODULE_ALIAS_NET_ACT("skbmod"); static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_skbmod_ops.net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, .exit_batch = skbmod_exit_net, .id = &act_skbmod_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); MODULE_DESCRIPTION("SKB data mod-ing"); MODULE_LICENSE("GPL"); static int __init skbmod_init_module(void) { return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); } static void __exit skbmod_cleanup_module(void) { tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); } module_init(skbmod_init_module); module_exit(skbmod_cleanup_module);
84 510 48 88 33 19 29 19 12 88 88 88 83 33 19 35 2 31 18 25 16 11 19 24 19 19 22 19 11 70 3 4076 1 461 3576 3576 3567 3572 51 10 3 3 6788 6806 73 31 5549 5560 88 14 7 7 2784 81 1898 103 2 474 37 421 126 29 23 6 308 189 393 300 94 75 1 13 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal.h: mm/ internal definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H #include <linux/fs.h> #include <linux/khugepaged.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/swap_cgroup.h> #include <linux/tracepoint-defs.h> /* Internal core VMA manipulation functions. */ #include "vma.h" struct folio_batch; /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints * about IO, FS and watermark checking while ignoring placement * hints such as HIGHMEM usage. */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ __GFP_NOLOCKDEP) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation cpuset and node placement constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) /* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) /* * Different from WARN_ON_ONCE(), no warning will be issued * when we specify __GFP_NOWARN. */ #define WARN_ON_ONCE_GFP(cond, gfp) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \ __warned = true; \ WARN_ON(1); \ } \ unlikely(__ret_warn_once); \ }) void page_writeback_init(void); /* * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages, * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently * leaves nr_pages_mapped at 0, but avoid surprise if it participates later. */ #define ENTIRELY_MAPPED 0x800000 #define FOLIO_PAGES_MAPPED (ENTIRELY_MAPPED - 1) /* * Flags passed to __show_mem() and show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ /* * How many individual pages have an elevated _mapcount. Excludes * the folio's entire_mapcount. * * Don't use this function outside of debugging code. */ static inline int folio_nr_pages_mapped(const struct folio *folio) { return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; } /* * Retrieve the first entry of a folio based on a provided entry within the * folio. We cannot rely on folio->swap as there is no guarantee that it has * been initialized. Used for calling arch_swap_restore() */ static inline swp_entry_t folio_swap(swp_entry_t entry, const struct folio *folio) { swp_entry_t swap = { .val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)), }; return swap; } static inline void *folio_raw_mapping(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; return (void *)(mapping & ~PAGE_MAPPING_FLAGS); } #ifdef CONFIG_MMU /* Flags for folio_pte_batch(). */ typedef int __bitwise fpb_t; /* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ #define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) /* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */ #define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1)) static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) { if (flags & FPB_IGNORE_DIRTY) pte = pte_mkclean(pte); if (likely(flags & FPB_IGNORE_SOFT_DIRTY)) pte = pte_clear_soft_dirty(pte); return pte_wrprotect(pte_mkold(pte)); } /** * folio_pte_batch - detect a PTE batch for a large folio * @folio: The large folio to detect a PTE batch for. * @addr: The user virtual address the first page is mapped at. * @start_ptep: Page table pointer for the first entry. * @pte: Page table entry for the first page. * @max_nr: The maximum number of table entries to consider. * @flags: Flags to modify the PTE batch semantics. * @any_writable: Optional pointer to indicate whether any entry except the * first one is writable. * @any_young: Optional pointer to indicate whether any entry except the * first one is young. * @any_dirty: Optional pointer to indicate whether any entry except the * first one is dirty. * * Detect a PTE batch: consecutive (present) PTEs that map consecutive * pages of the same large folio. * * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY). * * start_ptep must map any page of the folio. max_nr must be at least one and * must be limited by the caller so scanning cannot exceed a single page table. * * Return: the number of table entries in the batch. */ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags, bool *any_writable, bool *any_young, bool *any_dirty) { unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio); const pte_t *end_ptep = start_ptep + max_nr; pte_t expected_pte, *ptep; bool writable, young, dirty; int nr; if (any_writable) *any_writable = false; if (any_young) *any_young = false; if (any_dirty) *any_dirty = false; VM_WARN_ON_FOLIO(!pte_present(pte), folio); VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); nr = pte_batch_hint(start_ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = start_ptep + nr; while (ptep < end_ptep) { pte = ptep_get(ptep); if (any_writable) writable = !!pte_write(pte); if (any_young) young = !!pte_young(pte); if (any_dirty) dirty = !!pte_dirty(pte); pte = __pte_batch_clear_ignored(pte, flags); if (!pte_same(pte, expected_pte)) break; /* * Stop immediately once we reached the end of the folio. In * corner cases the next PFN might fall into a different * folio. */ if (pte_pfn(pte) >= folio_end_pfn) break; if (any_writable) *any_writable |= writable; if (any_young) *any_young |= young; if (any_dirty) *any_dirty |= dirty; nr = pte_batch_hint(ptep, pte); expected_pte = pte_advance_pfn(expected_pte, nr); ptep += nr; } return min(ptep - start_ptep, max_nr); } /** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * * Moves the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) { swp_entry_t entry = pte_to_swp_entry(pte); pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry), (swp_offset(entry) + delta))); if (pte_swp_soft_dirty(pte)) new = pte_swp_mksoft_dirty(new); if (pte_swp_exclusive(pte)) new = pte_swp_mkexclusive(new); if (pte_swp_uffd_wp(pte)) new = pte_swp_mkuffd_wp(new); return new; } /** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_next_swp_offset(pte_t pte) { return pte_move_swp_offset(pte, 1); } /** * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries * @start_ptep: Page table pointer for the first entry. * @max_nr: The maximum number of table entries to consider. * @pte: Page table entry for the first entry. * * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs * containing swap entries all with consecutive offsets and targeting the same * swap type, all with matching swp pte bits. * * max_nr must be at least one and must be limited by the caller so scanning * cannot exceed a single page table. * * Return: the number of table entries in the batch. */ static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) { pte_t expected_pte = pte_next_swp_offset(pte); const pte_t *end_ptep = start_ptep + max_nr; swp_entry_t entry = pte_to_swp_entry(pte); pte_t *ptep = start_ptep + 1; unsigned short cgroup_id; VM_WARN_ON(max_nr < 1); VM_WARN_ON(!is_swap_pte(pte)); VM_WARN_ON(non_swap_entry(entry)); cgroup_id = lookup_swap_cgroup_id(entry); while (ptep < end_ptep) { pte = ptep_get(ptep); if (!pte_same(pte, expected_pte)) break; if (lookup_swap_cgroup_id(pte_to_swp_entry(pte)) != cgroup_id) break; expected_pte = pte_next_swp_offset(expected_pte); ptep++; } return ptep - start_ptep; } #endif /* CONFIG_MMU */ void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, int nr_throttled); static inline void acct_reclaim_writeback(struct folio *folio) { pg_data_t *pgdat = folio_pgdat(folio); int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); if (nr_throttled) __acct_reclaim_writeback(pgdat, folio, nr_throttled); } static inline void wake_throttle_isolated(pg_data_t *pgdat) { wait_queue_head_t *wqh; wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; if (waitqueue_active(wqh)) wake_up(wqh); } vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf); static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) { vm_fault_t ret = __vmf_anon_prepare(vmf); if (unlikely(ret & VM_FAULT_RETRY)) vma_end_read(vmf->vma); return ret; } vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); bool __folio_end_writeback(struct folio *folio); void deactivate_file_folio(struct folio *folio); void folio_activate(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); struct zap_details; void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details); void page_cache_ra_order(struct readahead_control *, struct file_ra_state *, unsigned int order); void force_page_cache_ra(struct readahead_control *, unsigned long nr); static inline void force_page_cache_readahead(struct address_space *mapping, struct file *file, pgoff_t index, unsigned long nr_to_read) { DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index); force_page_cache_ra(&ractl, nr_to_read); } unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end); long mapping_evict_folio(struct address_space *mapping, struct folio *folio); unsigned long mapping_try_invalidate(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_failed); /** * folio_evictable - Test whether a folio is evictable. * @folio: The folio to test. * * Test whether @folio is evictable -- i.e., should be placed on * active/inactive lists vs unevictable list. * * Reasons folio might not be evictable: * 1. folio's mapping marked unevictable * 2. One of the pages in the folio is part of an mlocked VMA */ static inline bool folio_evictable(struct folio *folio) { bool ret; /* Prevent address_space of inode and swap cache from being freed */ rcu_read_lock(); ret = !mapping_unevictable(folio_mapping(folio)) && !folio_test_mlocked(folio); rcu_read_unlock(); return ret; } /* * Turn a non-refcounted page (->_refcount == 0) into refcounted with * a count of one. */ static inline void set_page_refcounted(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page_ref_count(page), page); set_page_count(page, 1); } /* * Return true if a folio needs ->release_folio() calling upon it. */ static inline bool folio_needs_release(struct folio *folio) { struct address_space *mapping = folio_mapping(folio); return folio_has_private(folio) || (mapping && mapping_release_always(mapping)); } extern unsigned long highest_memmap_pfn; /* * Maximum number of reclaim retries without progress before the OOM * killer is consider the only way forward. */ #define MAX_RECLAIM_RETRIES 16 /* * in mm/vmscan.c: */ bool folio_isolate_lru(struct folio *folio); void folio_putback_lru(struct folio *folio); extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); /* * in mm/rmap.c: */ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); /* * in mm/page_alloc.c */ #define K(x) ((x) << (PAGE_SHIFT-10)) extern char * const zone_names[MAX_NR_ZONES]; /* perform sanity checks on struct pages being allocated or freed */ DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); extern int min_free_kbytes; void setup_per_zone_wmarks(void); void calculate_min_free_kbytes(void); int __meminit init_per_zone_wmark_min(void); void page_alloc_sysctl_init(void); /* * Structure for holding the mostly immutable allocation parameters passed * between functions involved in allocations, including the alloc_pages* * family of functions. * * nodemask, migratetype and highest_zoneidx are initialized only once in * __alloc_pages() and then never change. * * zonelist, preferred_zone and highest_zoneidx are set first in * __alloc_pages() for the fast path, and might be later changed * in __alloc_pages_slowpath(). All other functions pass the whole structure * by a const pointer. */ struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zoneref *preferred_zoneref; int migratetype; /* * highest_zoneidx represents highest usable zone index of * the allocation request. Due to the nature of the zone, * memory on lower zone than the highest_zoneidx will be * protected by lowmem_reserve[highest_zoneidx]. * * highest_zoneidx is also used by reclaim/compaction to limit * the target zone since higher zone than this index cannot be * usable for this allocation request. */ enum zone_type highest_zoneidx; bool spread_dirty_pages; }; /* * This function returns the order of a free page in the buddy system. In * general, page_zone(page)->lock must be held by the caller to prevent the * page from being allocated in parallel and returning garbage as the order. * If a caller does not hold page_zone(page)->lock, it must guarantee that the * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use buddy_order_unsafe() below. */ static inline unsigned int buddy_order(struct page *page) { /* PageBuddy() must be checked by the caller */ return page_private(page); } /* * Like buddy_order(), but for callers who cannot afford to hold the zone lock. * PageBuddy() should be checked first by the caller to minimize race window, * and invalid values must be handled gracefully. * * READ_ONCE is used so that if the caller assigns the result into a local * variable and e.g. tests it for valid range before using, the compiler cannot * decide to remove the variable and inline the page_private(page) multiple * times, potentially observing different values in the tests and the actual * use of the result. */ #define buddy_order_unsafe(page) READ_ONCE(page_private(page)) /* * This function checks whether a page is free && is the buddy * we can coalesce a page and its buddy if * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * * For recording whether a page is in the buddy system, we set PageBuddy. * Setting, clearing, and testing PageBuddy is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ static inline bool page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { if (!page_is_guard(buddy) && !PageBuddy(buddy)) return false; if (buddy_order(buddy) != order) return false; /* * zone check is done late to avoid uselessly calculating * zone/node ids for pages that could never merge. */ if (page_zone_id(page) != page_zone_id(buddy)) return false; VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return true; } /* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) * * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER */ static inline unsigned long __find_buddy_pfn(unsigned long page_pfn, unsigned int order) { return page_pfn ^ (1 << order); } /* * Find the buddy of @page and validate it. * @page: The input page * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the * function is used in the performance-critical __free_one_page(). * @order: The order of the page * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to * page_to_pfn(). * * The found buddy can be a non PageBuddy, out of @page's zone, or its order is * not the same as @page. The validation is necessary before use it. * * Return: the found buddy page or NULL if not found. */ static inline struct page *find_buddy_page_pfn(struct page *page, unsigned long pfn, unsigned int order, unsigned long *buddy_pfn) { unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order); struct page *buddy; buddy = page + (__buddy_pfn - pfn); if (buddy_pfn) *buddy_pfn = __buddy_pfn; if (page_is_buddy(page, buddy, order)) return buddy; return NULL; } extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone); static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone) { if (zone->contiguous) return pfn_to_page(start_pfn); return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); } void set_zone_contiguous(struct zone *zone); static inline void clear_zone_contiguous(struct zone *zone) { zone->contiguous = false; } extern int __isolate_free_page(struct page *page, unsigned int order); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order, enum meminit_context context); /* * This will have no effect, other than possibly generating a warning, if the * caller passes in a non-large folio. */ static inline void folio_set_order(struct folio *folio, unsigned int order) { if (WARN_ON_ONCE(!order || !folio_test_large(folio))) return; folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order; #ifdef CONFIG_64BIT folio->_folio_nr_pages = 1U << order; #endif } void __folio_undo_large_rmappable(struct folio *folio); static inline void folio_undo_large_rmappable(struct folio *folio) { if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) return; /* * At this point, there is no one trying to add the folio to * deferred_list. If folio is not in deferred_list, it's safe * to check without acquiring the split_queue_lock. */ if (data_race(list_empty(&folio->_deferred_list))) return; __folio_undo_large_rmappable(folio); } static inline struct folio *page_rmappable_folio(struct page *page) { struct folio *folio = (struct folio *)page; if (folio && folio_test_large(folio)) folio_set_large_rmappable(folio); return folio; } static inline void prep_compound_head(struct page *page, unsigned int order) { struct folio *folio = (struct folio *)page; folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); atomic_set(&folio->_entire_mapcount, -1); atomic_set(&folio->_nr_pages_mapped, 0); atomic_set(&folio->_pincount, 0); if (order > 1) INIT_LIST_HEAD(&folio->_deferred_list); } static inline void prep_compound_tail(struct page *head, int tail_idx) { struct page *p = head + tail_idx; p->mapping = TAIL_MAPPING; set_compound_head(p, head); set_page_private(p, 0); } extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern bool free_pages_prepare(struct page *page, unsigned int order); extern int user_min_free_kbytes; void free_unref_page(struct page *page, unsigned int order); void free_unref_folios(struct folio_batch *fbatch); extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); extern void zone_pcp_enable(struct zone *zone); extern void zone_pcp_init(struct zone *zone); extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, int nid, bool exact_nid); void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int); #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* * in mm/compaction.c */ /* * compact_control is used to track pages being migrated and the free pages * they are being migrated to during memory compaction. The free_pfn starts * at the end of a zone and migrate_pfn begins at the start. Movable pages * are moved to the end of a zone during a compaction run and the run * completes when free_pfn <= migrate_pfn */ struct compact_control { struct list_head freepages[NR_PAGE_ORDERS]; /* List of free pages to migrate to */ struct list_head migratepages; /* List of pages being migrated */ unsigned int nr_freepages; /* Number of isolated free pages */ unsigned int nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ /* * Acts as an in/out parameter to page isolation for migration. * isolate_migratepages uses it as a search base. * isolate_migratepages_block will update the value to the next pfn * after the last isolated one. */ unsigned long migrate_pfn; unsigned long fast_start_pfn; /* a pfn to start linear scan from */ struct zone *zone; unsigned long total_migrate_scanned; unsigned long total_free_scanned; unsigned short fast_search_fail;/* failures to use free list searches */ short search_order; /* order to start a fast search at */ const gfp_t gfp_mask; /* gfp mask of a direct compactor */ int order; /* order a direct compactor needs */ int migratetype; /* migratetype of direct compactor */ const unsigned int alloc_flags; /* alloc flags of a direct compactor */ const int highest_zoneidx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock contention */ bool finish_pageblock; /* Scan the remainder of a pageblock. Used * when there are potentially transient * isolation or migration failures to * ensure forward progress. */ bool alloc_contig; /* alloc_contig_range allocation */ }; /* * Used in direct compaction when a page should be taken from the freelists * immediately when one is created during the free path. */ struct capture_control { struct compact_control *cc; struct page *page; }; unsigned long isolate_freepages_range(struct compact_control *cc, unsigned long start_pfn, unsigned long end_pfn); int isolate_migratepages_range(struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn); int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long start, unsigned long end, int migratetype); /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ int find_suitable_fallback(struct free_area *area, unsigned int order, int migratetype, bool only_stealable, bool *can_steal); static inline bool free_area_empty(struct free_area *area, int migratetype) { return list_empty(&area->free_list[migratetype]); } /* mm/util.c */ struct anon_vma *folio_anon_vma(struct folio *folio); #ifdef CONFIG_MMU void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); extern long faultin_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool write, int *locked); extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags, unsigned long bytes); /* * NOTE: This function can't tell whether the folio is "fully mapped" in the * range. * "fully mapped" means all the pages of folio is associated with the page * table of range while this function just check whether the folio range is * within the range [start, end). Function caller needs to do page table * check if it cares about the page table association. * * Typical usage (like mlock or madvise) is: * Caller knows at least 1 page of folio is associated with page table of VMA * and the range [start, end) is intersect with the VMA range. Caller wants * to know whether the folio is fully associated with the range. It calls * this function to check whether the folio is in the range first. Then checks * the page table to know whether the folio is fully mapped to the range. */ static inline bool folio_within_range(struct folio *folio, struct vm_area_struct *vma, unsigned long start, unsigned long end) { pgoff_t pgoff, addr; unsigned long vma_pglen = vma_pages(vma); VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio); if (start > end) return false; if (start < vma->vm_start) start = vma->vm_start; if (end > vma->vm_end) end = vma->vm_end; pgoff = folio_pgoff(folio); /* if folio start address is not in vma range */ if (!in_range(pgoff, vma->vm_pgoff, vma_pglen)) return false; addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); return !(addr < start || end - addr < folio_size(folio)); } static inline bool folio_within_vma(struct folio *folio, struct vm_area_struct *vma) { return folio_within_range(folio, vma, vma->vm_start, vma->vm_end); } /* * mlock_vma_folio() and munlock_vma_folio(): * should be called with vma's mmap_lock held for read or write, * under page table lock for the pte/pmd being added or removed. * * mlock is usually called at the end of folio_add_*_rmap_*(), munlock at * the end of folio_remove_rmap_*(); but new anon folios are managed by * folio_add_lru_vma() calling mlock_new_folio(). */ void mlock_folio(struct folio *folio); static inline void mlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * The VM_SPECIAL check here serves two purposes. * 1) VM_IO check prevents migration from double-counting during mlock. * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED * is never left set on a VM_SPECIAL vma, there is an interval while * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may * still be set while VM_SPECIAL bits are added: so ignore it then. */ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED)) mlock_folio(folio); } void munlock_folio(struct folio *folio); static inline void munlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * munlock if the function is called. Ideally, we should only * do munlock if any page of folio is unmapped from VMA and * cause folio not fully mapped to VMA. * * But it's not easy to confirm that's the situation. So we * always munlock the folio and page reclaim will correct it * if it's wrong. */ if (unlikely(vma->vm_flags & VM_LOCKED)) munlock_folio(folio); } void mlock_new_folio(struct folio *folio); bool need_mlock_drain(int cpu); void mlock_drain_local(void); void mlock_drain_remote(int cpu); extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /** * vma_address - Find the virtual address a page range is mapped at * @vma: The vma which maps this object. * @pgoff: The page offset within its object. * @nr_pages: The number of pages to consider. * * If any page in this range is mapped by this VMA, return the first address * where any of these pages appear. Otherwise, return -EFAULT. */ static inline unsigned long vma_address(struct vm_area_struct *vma, pgoff_t pgoff, unsigned long nr_pages) { unsigned long address; if (pgoff >= vma->vm_pgoff) { address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address >= vma->vm_end) address = -EFAULT; } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) { /* Test above avoids possibility of wrap to 0 on 32-bit */ address = vma->vm_start; } else { address = -EFAULT; } return address; } /* * Then at what user virtual address will none of the range be found in vma? * Assumes that vma_address() already returned a good starting address. */ static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw) { struct vm_area_struct *vma = pvmw->vma; pgoff_t pgoff; unsigned long address; /* Common case, plus ->pgoff is invalid for KSM */ if (pvmw->nr_pages == 1) return pvmw->address + PAGE_SIZE; pgoff = pvmw->pgoff + pvmw->nr_pages; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address > vma->vm_end) address = vma->vm_end; return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, struct file *fpin) { int flags = vmf->flags; if (fpin) return fpin; /* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_lock if only * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. */ if (fault_flag_allow_retry_first(flags) && !(flags & FAULT_FLAG_RETRY_NOWAIT)) { fpin = get_file(vmf->vma->vm_file); release_fault_lock(vmf); } return fpin; } #else /* !CONFIG_MMU */ static inline void unmap_mapping_folio(struct folio *folio) { } static inline void mlock_new_folio(struct folio *folio) { } static inline bool need_mlock_drain(int cpu) { return false; } static inline void mlock_drain_local(void) { } static inline void mlock_drain_remote(int cpu) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) { } #endif /* !CONFIG_MMU */ /* Memory initialisation debug and verification */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT DECLARE_STATIC_KEY_TRUE(deferred_pages); bool __init deferred_grow_zone(struct zone *zone, unsigned int order); #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ enum mminit_level { MMINIT_WARNING, MMINIT_VERIFY, MMINIT_TRACE }; #ifdef CONFIG_DEBUG_MEMORY_INIT extern int mminit_loglevel; #define mminit_dprintk(level, prefix, fmt, arg...) \ do { \ if (level < mminit_loglevel) { \ if (level <= MMINIT_WARNING) \ pr_warn("mminit::" prefix " " fmt, ##arg); \ else \ printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ } \ } while (0) extern void mminit_verify_pageflags_layout(void); extern void mminit_verify_zonelist(void); #else static inline void mminit_dprintk(enum mminit_level level, const char *prefix, const char *fmt, ...) { } static inline void mminit_verify_pageflags_layout(void) { } static inline void mminit_verify_zonelist(void) { } #endif /* CONFIG_DEBUG_MEMORY_INIT */ #define NODE_RECLAIM_NOSCAN -2 #define NODE_RECLAIM_FULL -1 #define NODE_RECLAIM_SOME 0 #define NODE_RECLAIM_SUCCESS 1 #ifdef CONFIG_NUMA extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order) { return NODE_RECLAIM_NOSCAN; } static inline int find_next_best_node(int node, nodemask_t *used_node_mask) { return NUMA_NO_NODE; } #endif /* * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu); void shake_folio(struct folio *folio); extern int hwpoison_filter(struct page *p); extern u32 hwpoison_filter_dev_major; extern u32 hwpoison_filter_dev_minor; extern u64 hwpoison_filter_flags_mask; extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; #define MAGIC_HWPOISON 0x48575053U /* HWPS */ void SetPageHWPoisonTakenOff(struct page *page); void ClearPageHWPoisonTakenOff(struct page *page); bool take_page_off_buddy(struct page *page); bool put_page_back_buddy(struct page *page); struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); void add_to_kill_ksm(struct task_struct *tsk, struct page *p, struct vm_area_struct *vma, struct list_head *to_kill, unsigned long ksm_addr); unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); #else static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) { } #endif extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern void set_pageblock_order(void); struct folio *alloc_migrate_folio(struct folio *src, unsigned long private); unsigned long reclaim_pages(struct list_head *folio_list); unsigned int reclaim_clean_pages_from_list(struct zone *zone, struct list_head *folio_list); /* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW #define ALLOC_WMARK_HIGH WMARK_HIGH #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ /* Mask to get the watermark bits */ #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) /* * Only MMU archs have async oom victim reclaim - aka oom_reaper so we * cannot assume a reduced access to memory reserves is sufficient for * !MMU */ #ifdef CONFIG_MMU #define ALLOC_OOM 0x08 #else #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif #define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access * to 25% of the min watermark or * 62.5% if __GFP_HIGH is set. */ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark. */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #ifdef CONFIG_ZONE_DMA32 #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ #else #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ /* Flags that allow allocations below the min watermark. */ #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) enum ttu_flags; struct tlbflush_unmap_batch; /* * only for MM internal work items which do not depend on * any allocations or locks which might depend on allocations */ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { } static inline void try_to_unmap_flush_dirty(void) { } static inline void flush_tlb_batched_pending(struct mm_struct *mm) { } #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; static inline bool is_migrate_highatomic(enum migratetype migratetype) { return migratetype == MIGRATE_HIGHATOMIC; } void setup_zone_pageset(struct zone *zone); struct migration_target_control { int nid; /* preferred node id */ nodemask_t *nmask; gfp_t gfp_mask; enum migrate_reason reason; }; /* * mm/filemap.c */ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, struct folio *folio, loff_t fpos, size_t size); /* * mm/vmalloc.c */ #ifdef CONFIG_MMU void __init vmalloc_init(void); int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); #else static inline void vmalloc_init(void) { } static inline int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { return -EINVAL; } #endif int __must_check __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); void free_zone_device_folio(struct folio *folio); int migrate_device_coherent_folio(struct folio *folio); /* * mm/gup.c */ int __must_check try_grab_folio(struct folio *folio, int refs, unsigned int flags); /* * mm/huge_memory.c */ void touch_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, bool write); void touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write); enum { /* mark page accessed */ FOLL_TOUCH = 1 << 16, /* a retry, previous pass started an IO */ FOLL_TRIED = 1 << 17, /* we are working on non-current tsk/mm */ FOLL_REMOTE = 1 << 18, /* pages must be released via unpin_user_page */ FOLL_PIN = 1 << 19, /* gup_fast: prevent fall-back to slow gup */ FOLL_FAST_ONLY = 1 << 20, /* allow unlocking the mmap lock */ FOLL_UNLOCKABLE = 1 << 21, /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */ FOLL_MADV_POPULATE = 1 << 22, }; #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \ FOLL_MADV_POPULATE) /* * Indicates for which pages that are write-protected in the page table, * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the * GUP pin will remain consistent with the pages mapped into the page tables * of the MM. * * Temporary unmapping of PageAnonExclusive() pages or clearing of * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq * * GUP-fast and KSM or temporary unmapping (swap, migration): see * folio_try_share_anon_rmap_*() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. * * If the vma is NULL, we're coming from the GUP-fast path and might have * to fallback to the slow path just to lookup the vma. */ static inline bool gup_must_unshare(struct vm_area_struct *vma, unsigned int flags, struct page *page) { /* * FOLL_WRITE is implicitly handled correctly as the page table entry * has to be writable -- and if it references (part of) an anonymous * folio, that part is required to be marked exclusive. */ if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) return false; /* * Note: PageAnon(page) is stable until the page is actually getting * freed. */ if (!PageAnon(page)) { /* * We only care about R/O long-term pining: R/O short-term * pinning does not have the semantics to observe successive * changes through the process page tables. */ if (!(flags & FOLL_LONGTERM)) return false; /* We really need the vma ... */ if (!vma) return true; /* * ... because we only care about writable private ("COW") * mappings where we have to break COW early. */ return is_cow_mapping(vma->vm_flags); } /* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_rmb(); /* * Note that PageKsm() pages cannot be exclusive, and consequently, * cannot get pinned. */ return !PageAnonExclusive(page); } extern bool mirrored_kernelcore; extern bool memblock_has_mirror(void); static __always_inline void vma_set_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff) { vma->vm_start = start; vma->vm_end = end; vma->vm_pgoff = pgoff; } static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) { /* * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty * enablements, because when without soft-dirty being compiled in, * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true. */ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return false; /* * Soft-dirty is kind of special: its tracking is enabled when the * vma flags not set. */ return !(vma->vm_flags & VM_SOFTDIRTY); } static inline bool pmd_needs_soft_dirty_wp(struct vm_area_struct *vma, pmd_t pmd) { return vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd); } static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte) { return vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte); } void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_64BIT static inline int can_do_mseal(unsigned long flags) { if (flags) return -EINVAL; return 0; } #else static inline int can_do_mseal(unsigned long flags) { return -EPERM; } #endif #ifdef CONFIG_SHRINKER_DEBUG static inline __printf(2, 0) int shrinker_debugfs_name_alloc( struct shrinker *shrinker, const char *fmt, va_list ap) { shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); return shrinker->name ? 0 : -ENOMEM; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { kfree_const(shrinker->name); shrinker->name = NULL; } extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id); #else /* CONFIG_SHRINKER_DEBUG */ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, const char *fmt, va_list ap) { return 0; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { } static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { *debugfs_id = -1; return NULL; } static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id) { } #endif /* CONFIG_SHRINKER_DEBUG */ /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); extern struct list_lru shadow_nodes; /* mremap.c */ unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, bool need_rmap_locks, bool for_stack); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { } #endif /* CONFIG_UNACCEPTED_MEMORY */ #endif /* __MM_INTERNAL_H */
22 663 338 5 4 3 164 164 164 164 929 776 776 2 2 2 2 5 71 72 72 71 1 992 66 66 150 150 714 715 269 269 269 776 507 778 269 1 1 992 994 443 41 774 4 4 310 296 1 17 444 442 444 4 997 1 998 999 159 159 1 159 159 158 1 158 1 159 158 1 344 342 343 505 504 504 504 505 344 344 1 344 344 931 929 930 930 803 53 805 805 27 5 5 5 5 5 5 27 27 5 5 1 4 997 6 6 6 6 8 2 6 6 6 4 804 12 6 6 995 443 808 810 808 312 805 991 992 442 805 997 1 349 1 994 998 999 1 6 7 5 6 7 7 6 57 98 98 66 5 3 99 98 5 99 99 75 75 1 8 13 322 344 633 632 632 633 632 343 344 632 350 344 343 344 662 380 22 321 318 318 664 122 122 663 663 662 350 335 9 633 661 663 663 154 154 154 420 480 928 929 929 931 932 21 503 504 1 1 1 1 1 1 78 77 993 27 992 27 992 11 726 799 5 5 5 5 4 24 219 219 219 219 15 15 15 15 15 15 15 241 241 60 60 3 3 3 3 3 2 2 2 57 57 57 57 15 15 156 156 156 16 24 24 24 24 240 240 57 57 146 146 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 // SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "irq.h" #include "ioapic.h" #include "mmu.h" #include "mmu_internal.h" #include "tdp_mmu.h" #include "x86.h" #include "kvm_cache_regs.h" #include "smm.h" #include "kvm_emulate.h" #include "page_track.h" #include "cpuid.h" #include "spte.h" #include <linux/kvm_host.h> #include <linux/types.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/moduleparam.h> #include <linux/export.h> #include <linux/swap.h> #include <linux/hugetlb.h> #include <linux/compiler.h> #include <linux/srcu.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> #include <linux/kstrtox.h> #include <linux/kthread.h> #include <linux/wordpart.h> #include <asm/page.h> #include <asm/memtype.h> #include <asm/cmpxchg.h> #include <asm/io.h> #include <asm/set_memory.h> #include <asm/spec-ctrl.h> #include <asm/vmx.h> #include "trace.h" static bool nx_hugepage_mitigation_hard_disabled; int __read_mostly nx_huge_pages = -1; static uint __read_mostly nx_huge_pages_recovery_period_ms; #ifdef CONFIG_PREEMPT_RT /* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ static uint __read_mostly nx_huge_pages_recovery_ratio = 0; #else static uint __read_mostly nx_huge_pages_recovery_ratio = 60; #endif static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp); static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops nx_huge_pages_ops = { .set = set_nx_huge_pages, .get = get_nx_huge_pages, }; static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = { .set = set_nx_huge_pages_recovery_param, .get = param_get_uint, }; module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); __MODULE_PARM_TYPE(nx_huge_pages, "bool"); module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_ratio, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); module_param_cb(nx_huge_pages_recovery_period_ms, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_period_ms, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_period_ms, "uint"); static bool __read_mostly force_flush_and_sync_on_reuse; module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: * 1. the guest-virtual to guest-physical * 2. while doing 1. it walks guest-physical to host-physical * If the hardware supports that we don't need to do shadow paging. */ bool tdp_enabled = false; static bool __ro_after_init tdp_mmu_allowed; #ifdef CONFIG_X86_64 bool __read_mostly tdp_mmu_enabled = true; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444); #endif static int max_huge_page_level __read_mostly; static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; #define PTE_PREFETCH_NUM 8 #include <trace/events/kvm.h> /* make pte_list_desc fit well in cache lines */ #define PTE_LIST_EXT 14 /* * struct pte_list_desc is the core data structure used to implement a custom * list for tracking a set of related SPTEs, e.g. all the SPTEs that map a * given GFN when used in the context of rmaps. Using a custom list allows KVM * to optimize for the common case where many GFNs will have at most a handful * of SPTEs pointing at them, i.e. allows packing multiple SPTEs into a small * memory footprint, which in turn improves runtime performance by exploiting * cache locality. * * A list is comprised of one or more pte_list_desc objects (descriptors). * Each individual descriptor stores up to PTE_LIST_EXT SPTEs. If a descriptor * is full and a new SPTEs needs to be added, a new descriptor is allocated and * becomes the head of the list. This means that by definitions, all tail * descriptors are full. * * Note, the meta data fields are deliberately placed at the start of the * structure to optimize the cacheline layout; accessing the descriptor will * touch only a single cacheline so long as @spte_count<=6 (or if only the * descriptors metadata is accessed). */ struct pte_list_desc { struct pte_list_desc *more; /* The number of PTEs stored in _this_ descriptor. */ u32 spte_count; /* The number of PTEs stored in all tails of this descriptor. */ u32 tail_count; u64 *sptes[PTE_LIST_EXT]; }; struct kvm_shadow_walk_iterator { u64 addr; hpa_t shadow_addr; u64 *sptep; int level; unsigned index; }; #define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \ for (shadow_walk_init_using_root(&(_walker), (_vcpu), \ (_root), (_addr)); \ shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) #define for_each_shadow_entry(_vcpu, _addr, _walker) \ for (shadow_walk_init(&(_walker), _vcpu, _addr); \ shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) #define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \ for (shadow_walk_init(&(_walker), _vcpu, _addr); \ shadow_walk_okay(&(_walker)) && \ ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \ __shadow_walk_next(&(_walker), spte)) static struct kmem_cache *pte_list_desc_cache; struct kmem_cache *mmu_page_header_cache; static struct percpu_counter kvm_total_used_mmu_pages; static void mmu_spte_set(u64 *sptep, u64 spte); struct kvm_mmu_role_regs { const unsigned long cr0; const unsigned long cr4; const u64 efer; }; #define CREATE_TRACE_POINTS #include "mmutrace.h" /* * Yes, lot's of underscores. They're a hint that you probably shouldn't be * reading from the role_regs. Once the root_role is constructed, it becomes * the single source of truth for the MMU's state. */ #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag) \ static inline bool __maybe_unused \ ____is_##reg##_##name(const struct kvm_mmu_role_regs *regs) \ { \ return !!(regs->reg & flag); \ } BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, pg, X86_CR0_PG); BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, wp, X86_CR0_WP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pse, X86_CR4_PSE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pae, X86_CR4_PAE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smep, X86_CR4_SMEP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smap, X86_CR4_SMAP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pke, X86_CR4_PKE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, la57, X86_CR4_LA57); BUILD_MMU_ROLE_REGS_ACCESSOR(efer, nx, EFER_NX); BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA); /* * The MMU itself (with a valid role) is the single source of truth for the * MMU. Do not use the regs used to build the MMU/role, nor the vCPU. The * regs don't account for dependencies, e.g. clearing CR4 bits if CR0.PG=1, * and the vCPU may be incorrect/irrelevant. */ #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name) \ static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \ { \ return !!(mmu->cpu_role. base_or_ext . reg##_##name); \ } BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pse); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smep); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smap); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pke); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, la57); BUILD_MMU_ROLE_ACCESSOR(base, efer, nx); BUILD_MMU_ROLE_ACCESSOR(ext, efer, lma); static inline bool is_cr0_pg(struct kvm_mmu *mmu) { return mmu->cpu_role.base.level > 0; } static inline bool is_cr4_pae(struct kvm_mmu *mmu) { return !mmu->cpu_role.base.has_4_byte_gpte; } static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) { struct kvm_mmu_role_regs regs = { .cr0 = kvm_read_cr0_bits(vcpu, KVM_MMU_CR0_ROLE_BITS), .cr4 = kvm_read_cr4_bits(vcpu, KVM_MMU_CR4_ROLE_BITS), .efer = vcpu->arch.efer, }; return regs; } static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) { return kvm_read_cr3(vcpu); } static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) return kvm_read_cr3(vcpu); return mmu->get_guest_pgd(vcpu); } static inline bool kvm_available_flush_remote_tlbs_range(void) { #if IS_ENABLED(CONFIG_HYPERV) return kvm_x86_ops.flush_remote_tlbs_range; #else return false; #endif } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); /* Flush the range of guest memory mapped by the given SPTE. */ static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep)); kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access) { u64 spte = make_mmio_spte(vcpu, gfn, access); trace_mark_mmio_spte(sptep, gfn, spte); mmu_spte_set(sptep, spte); } static gfn_t get_mmio_spte_gfn(u64 spte) { u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) & shadow_nonpresent_or_rsvd_mask; return gpa >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) { return spte & shadow_mmio_access_mask; } static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) { u64 kvm_gen, spte_gen, gen; gen = kvm_vcpu_memslots(vcpu)->generation; if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) return false; kvm_gen = gen & MMIO_SPTE_GEN_MASK; spte_gen = get_mmio_spte_generation(spte); trace_check_mmio_spte(spte, kvm_gen, spte_gen); return likely(kvm_gen == spte_gen); } static int is_cpuid_PSE36(void) { return 1; } #ifdef CONFIG_X86_64 static void __set_spte(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); WRITE_ONCE(*sptep, spte); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); WRITE_ONCE(*sptep, spte); } static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); return xchg(sptep, spte); } static u64 __get_spte_lockless(u64 *sptep) { return READ_ONCE(*sptep); } #else union split_spte { struct { u32 spte_low; u32 spte_high; }; u64 spte; }; static void count_spte_clear(u64 *sptep, u64 spte) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); if (is_shadow_present_pte(spte)) return; /* Ensure the spte is completely set before we increase the count */ smp_wmb(); sp->clear_spte_count++; } static void __set_spte(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; ssptep->spte_high = sspte.spte_high; /* * If we map the spte from nonpresent to present, We should store * the high bits firstly, then set present bit, so cpu can not * fetch this spte while we are setting the spte. */ smp_wmb(); WRITE_ONCE(ssptep->spte_low, sspte.spte_low); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; WRITE_ONCE(ssptep->spte_low, sspte.spte_low); /* * If we map the spte from present to nonpresent, we should clear * present bit firstly to avoid vcpu fetch the old high bits. */ smp_wmb(); ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); } static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte, orig; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; /* xchg acts as a barrier before the setting of the high bits */ orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); orig.spte_high = ssptep->spte_high; ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); return orig.spte; } /* * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte (mm/gup.c). * * An spte tlb flush may be pending, because they are coalesced and * we are running out of the MMU lock. Therefore * we need to protect against in-progress updates of the spte. * * Reading the spte while an update is in progress may get the old value * for the high part of the spte. The race is fine for a present->non-present * change (because the high part of the spte is ignored for non-present spte), * but for a present->present change we must reread the spte. * * All such changes are done in two steps (present->non-present and * non-present->present), hence it is enough to count the number of * present->non-present updates: if it changed while reading the spte, * we might have hit the race. This is done using clear_spte_count. */ static u64 __get_spte_lockless(u64 *sptep) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); union split_spte spte, *orig = (union split_spte *)sptep; int count; retry: count = sp->clear_spte_count; smp_rmb(); spte.spte_low = orig->spte_low; smp_rmb(); spte.spte_high = orig->spte_high; smp_rmb(); if (unlikely(spte.spte_low != orig->spte_low || count != sp->clear_spte_count)) goto retry; return spte.spte; } #endif /* Rules for using mmu_spte_set: * Set the sptep from nonpresent to present. * Note: the sptep being assigned *must* be either not present * or in a state where the hardware will not attempt to update * the spte. */ static void mmu_spte_set(u64 *sptep, u64 new_spte) { WARN_ON_ONCE(is_shadow_present_pte(*sptep)); __set_spte(sptep, new_spte); } /* * Update the SPTE (excluding the PFN), but do not track changes in its * accessed/dirty status. */ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; WARN_ON_ONCE(!is_shadow_present_pte(new_spte)); check_spte_writable_invariants(new_spte); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); return old_spte; } if (!spte_has_volatile_bits(old_spte)) __update_clear_spte_fast(sptep, new_spte); else old_spte = __update_clear_spte_slow(sptep, new_spte); WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); return old_spte; } /* Rules for using mmu_spte_update: * Update the state bits, it means the mapped pfn is not changed. * * Whenever an MMU-writable SPTE is overwritten with a read-only SPTE, remote * TLBs must be flushed. Otherwise rmap_write_protect will find a read-only * spte, even though the writable spte might be cached on a CPU's TLB. * * Returns true if the TLB needs to be flushed */ static bool mmu_spte_update(u64 *sptep, u64 new_spte) { bool flush = false; u64 old_spte = mmu_spte_update_no_track(sptep, new_spte); if (!is_shadow_present_pte(old_spte)) return false; /* * For the spte updated out of mmu-lock is safe, since * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ if (is_mmu_writable_spte(old_spte) && !is_writable_pte(new_spte)) flush = true; /* * Flush TLB when accessed/dirty states are changed in the page tables, * to guarantee consistency between TLB and page tables. */ if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) { flush = true; kvm_set_pfn_accessed(spte_to_pfn(old_spte)); } if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) { flush = true; kvm_set_pfn_dirty(spte_to_pfn(old_spte)); } return flush; } /* * Rules for using mmu_spte_clear_track_bits: * It sets the sptep from present to nonpresent, and track the * state bits, it is used to clear the last level sptep. * Returns the old PTE. */ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) { kvm_pfn_t pfn; u64 old_spte = *sptep; int level = sptep_to_sp(sptep)->role.level; struct page *page; if (!is_shadow_present_pte(old_spte) || !spte_has_volatile_bits(old_spte)) __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); if (!is_shadow_present_pte(old_spte)) return old_spte; kvm_update_page_stats(kvm, level, -1); pfn = spte_to_pfn(old_spte); /* * KVM doesn't hold a reference to any pages mapped into the guest, and * instead uses the mmu_notifier to ensure that KVM unmaps any pages * before they are reclaimed. Sanity check that, if the pfn is backed * by a refcounted page, the refcount is elevated. */ page = kvm_pfn_to_refcounted_page(pfn); WARN_ON_ONCE(page && !page_count(page)); if (is_accessed_spte(old_spte)) kvm_set_pfn_accessed(pfn); if (is_dirty_spte(old_spte)) kvm_set_pfn_dirty(pfn); return old_spte; } /* * Rules for using mmu_spte_clear_no_track: * Directly clear spte without caring the state bits of sptep, * it is used to set the upper level spte. */ static void mmu_spte_clear_no_track(u64 *sptep) { __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); } static u64 mmu_spte_get_lockless(u64 *sptep) { return __get_spte_lockless(sptep); } static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) { return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; } static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_begin(); } else { /* * Prevent page table teardown by making any free-er wait during * kvm_flush_remote_tlbs() IPI to all active vcpus. */ local_irq_disable(); /* * Make sure a following spte read is not reordered ahead of the write * to vcpu->mode. */ smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); } } static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_end(); } else { /* * Make sure the write to vcpu->mode is not reordered in front of * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); } } static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) { int r; /* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */ r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, 1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM); if (r) return r; r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache, PT64_ROOT_MAX_LEVEL); if (r) return r; if (maybe_indirect) { r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadowed_info_cache, PT64_ROOT_MAX_LEVEL); if (r) return r; } return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache, PT64_ROOT_MAX_LEVEL); } static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); } static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) { kmem_cache_free(pte_list_desc_cache, pte_list_desc); } static bool sp_has_gptes(struct kvm_mmu_page *sp); static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) { if (sp->role.passthrough) return sp->gfn; if (sp->shadowed_translation) return sp->shadowed_translation[index] >> PAGE_SHIFT; return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS)); } /* * For leaf SPTEs, fetch the *guest* access permissions being shadowed. Note * that the SPTE itself may have a more constrained access permissions that * what the guest enforces. For example, a guest may create an executable * huge PTE but KVM may disallow execution to mitigate iTLB multihit. */ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index) { if (sp->shadowed_translation) return sp->shadowed_translation[index] & ACC_ALL; /* * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs, * KVM is not shadowing any guest page tables, so the "guest access * permissions" are just ACC_ALL. * * For direct SPs in indirect MMUs (shadow paging), i.e. when KVM * is shadowing a guest huge page with small pages, the guest access * permissions being shadowed are the access permissions of the huge * page. * * In both cases, sp->role.access contains the correct access bits. */ return sp->role.access; } static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, gfn_t gfn, unsigned int access) { if (sp->shadowed_translation) { sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access; return; } WARN_ONCE(access != kvm_mmu_page_get_access(sp, index), "access mismatch under %s page %llx (expected %u, got %u)\n", sp->role.passthrough ? "passthrough" : "direct", sp->gfn, kvm_mmu_page_get_access(sp, index), access); WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index), "gfn mismatch under %s page %llx (expected %llx, got %llx)\n", sp->role.passthrough ? "passthrough" : "direct", sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn); } static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, unsigned int access) { gfn_t gfn = kvm_mmu_page_get_gfn(sp, index); kvm_mmu_page_set_translation(sp, index, gfn, access); } /* * Return the pointer to the large page information for a given gfn, * handling slots that are not large page aligned. */ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, const struct kvm_memory_slot *slot, int level) { unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); return &slot->arch.lpage_info[level - 2][idx]; } /* * The most significant bit in disallow_lpage tracks whether or not memory * attributes are mixed, i.e. not identical for all gfns at the current level. * The lower order bits are used to refcount other cases where a hugepage is * disallowed, e.g. if KVM has shadow a page table at the gfn. */ #define KVM_LPAGE_MIXED_FLAG BIT(31) static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot, gfn_t gfn, int count) { struct kvm_lpage_info *linfo; int old, i; for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); old = linfo->disallow_lpage; linfo->disallow_lpage += count; WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG); } } void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, 1); } void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, -1); } static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; gfn_t gfn; kvm->arch.indirect_shadow_pages++; /* * Ensure indirect_shadow_pages is elevated prior to re-reading guest * child PTEs in FNAME(gpte_changed), i.e. guarantee either in-flight * emulated writes are visible before re-reading guest PTEs, or that * an emulated write will see the elevated count and acquire mmu_lock * to update SPTEs. Pairs with the smp_mb() in kvm_mmu_track_write(). */ smp_mb(); gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); /* the non-leaf shadow pages are keeping readonly. */ if (sp->role.level > PG_LEVEL_4K) return __kvm_write_track_add_gfn(kvm, slot, gfn); kvm_mmu_gfn_disallow_lpage(slot, gfn); if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { /* * If it's possible to replace the shadow page with an NX huge page, * i.e. if the shadow page is the only thing currently preventing KVM * from using a huge page, add the shadow page to the list of "to be * zapped for NX recovery" pages. Note, the shadow page can already be * on the list if KVM is reusing an existing shadow page, i.e. if KVM * links a shadow page at multiple points. */ if (!list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; list_add_tail(&sp->possible_nx_huge_page_link, &kvm->arch.possible_nx_huge_pages); } static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible) { sp->nx_huge_page_disallowed = true; if (nx_huge_page_possible) track_possible_nx_huge_page(kvm, sp); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; gfn_t gfn; kvm->arch.indirect_shadow_pages--; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); if (sp->role.level > PG_LEVEL_4K) return __kvm_write_track_remove_gfn(kvm, slot, gfn); kvm_mmu_gfn_allow_lpage(slot, gfn); } void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { if (list_empty(&sp->possible_nx_huge_page_link)) return; --kvm->stat.nx_lpage_splits; list_del_init(&sp->possible_nx_huge_page_link); } static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { sp->nx_huge_page_disallowed = false; untrack_possible_nx_huge_page(kvm, sp); } static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return NULL; if (no_dirty_log && kvm_slot_dirty_track_enabled(slot)) return NULL; return slot; } /* * About rmap_head encoding: * * If the bit zero of rmap_head->val is clear, then it points to the only spte * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. */ #define KVM_RMAP_MANY BIT(0) /* * Returns the number of pointers in the rmap chain, not counting the new one. */ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int count = 0; if (!rmap_head->val) { rmap_head->val = (unsigned long)spte; } else if (!(rmap_head->val & KVM_RMAP_MANY)) { desc = kvm_mmu_memory_cache_alloc(cache); desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; desc->spte_count = 2; desc->tail_count = 0; rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; ++count; } else { desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); count = desc->tail_count + desc->spte_count; /* * If the previous head is full, allocate a new head descriptor * as tail descriptors are always kept full. */ if (desc->spte_count == PTE_LIST_EXT) { desc = kvm_mmu_memory_cache_alloc(cache); desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); desc->spte_count = 0; desc->tail_count = count; rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY; } desc->sptes[desc->spte_count++] = spte; } return count; } static void pte_list_desc_remove_entry(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i) { struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); int j = head_desc->spte_count - 1; /* * The head descriptor should never be empty. A new head is added only * when adding an entry and the previous head is full, and heads are * removed (this flow) when they become empty. */ KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm); /* * Replace the to-be-freed SPTE with the last valid entry from the head * descriptor to ensure that tail descriptors are full at all times. * Note, this also means that tail_count is stable for each descriptor. */ desc->sptes[i] = head_desc->sptes[j]; head_desc->sptes[j] = NULL; head_desc->spte_count--; if (head_desc->spte_count) return; /* * The head descriptor is empty. If there are no tail descriptors, * nullify the rmap head to mark the list as empty, else point the rmap * head at the next descriptor, i.e. the new head. */ if (!head_desc->more) rmap_head->val = 0; else rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY; mmu_free_pte_list_desc(head_desc); } static void pte_list_remove(struct kvm *kvm, u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int i; if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm)) return; if (!(rmap_head->val & KVM_RMAP_MANY)) { if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm)) return; rmap_head->val = 0; } else { desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { pte_list_desc_remove_entry(kvm, rmap_head, desc, i); return; } } desc = desc->more; } KVM_BUG_ON_DATA_CORRUPTION(true, kvm); } } static void kvm_zap_one_rmap_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep) { mmu_spte_clear_track_bits(kvm, sptep); pte_list_remove(kvm, sptep, rmap_head); } /* Return true if at least one SPTE was zapped, false otherwise */ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc, *next; int i; if (!rmap_head->val) return false; if (!(rmap_head->val & KVM_RMAP_MANY)) { mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val); goto out; } desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); for (; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) mmu_spte_clear_track_bits(kvm, desc->sptes[i]); next = desc->more; mmu_free_pte_list_desc(desc); } out: /* rmap_head is meaningless now, remember to reset it */ rmap_head->val = 0; return true; } unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; if (!rmap_head->val) return 0; else if (!(rmap_head->val & KVM_RMAP_MANY)) return 1; desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); return desc->tail_count + desc->spte_count; } static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level, const struct kvm_memory_slot *slot) { unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; } static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; struct kvm_mmu_page *sp; gfn_t gfn; struct kvm_rmap_head *rmap_head; sp = sptep_to_sp(spte); gfn = kvm_mmu_page_get_gfn(sp, spte_index(spte)); /* * Unlike rmap_add, rmap_remove does not run in the context of a vCPU * so we have to determine which memslots to use based on context * information in sp->role. */ slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); pte_list_remove(kvm, spte, rmap_head); } /* * Used by the following functions to iterate through the sptes linked by a * rmap. All fields are private and not assumed to be used outside. */ struct rmap_iterator { /* private fields */ struct pte_list_desc *desc; /* holds the sptep if not NULL */ int pos; /* index of the sptep */ }; /* * Iteration must be started by this function. This should also be used after * removing/dropping sptes from the rmap link because in such cases the * information in the iterator may not be valid. * * Returns sptep if found, NULL otherwise. */ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter) { u64 *sptep; if (!rmap_head->val) return NULL; if (!(rmap_head->val & KVM_RMAP_MANY)) { iter->desc = NULL; sptep = (u64 *)rmap_head->val; goto out; } iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY); iter->pos = 0; sptep = iter->desc->sptes[iter->pos]; out: BUG_ON(!is_shadow_present_pte(*sptep)); return sptep; } /* * Must be used with a valid iterator: e.g. after rmap_get_first(). * * Returns sptep if found, NULL otherwise. */ static u64 *rmap_get_next(struct rmap_iterator *iter) { u64 *sptep; if (iter->desc) { if (iter->pos < PTE_LIST_EXT - 1) { ++iter->pos; sptep = iter->desc->sptes[iter->pos]; if (sptep) goto out; } iter->desc = iter->desc->more; if (iter->desc) { iter->pos = 0; /* desc->sptes[0] cannot be NULL */ sptep = iter->desc->sptes[iter->pos]; goto out; } } return NULL; out: BUG_ON(!is_shadow_present_pte(*sptep)); return sptep; } #define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ _spte_; _spte_ = rmap_get_next(_iter_)) static void drop_spte(struct kvm *kvm, u64 *sptep) { u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep); if (is_shadow_present_pte(old_spte)) rmap_remove(kvm, sptep); } static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) { struct kvm_mmu_page *sp; sp = sptep_to_sp(sptep); WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); if (flush) kvm_flush_remote_tlbs_sptep(kvm, sptep); } /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. * * Note: write protection is difference between dirty logging and spte * protection: * - for dirty logging, the spte can be set to writable at anytime if * its dirty bitmap is properly set. * - for spte protection, the spte can be writable only after unsync-ing * shadow page. * * Return true if tlb need be flushed. */ static bool spte_write_protect(u64 *sptep, bool pt_protect) { u64 spte = *sptep; if (!is_writable_pte(spte) && !(pt_protect && is_mmu_writable_spte(spte))) return false; if (pt_protect) spte &= ~shadow_mmu_writable_mask; spte = spte & ~PT_WRITABLE_MASK; return mmu_spte_update(sptep, spte); } static bool rmap_write_protect(struct kvm_rmap_head *rmap_head, bool pt_protect) { u64 *sptep; struct rmap_iterator iter; bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_write_protect(sptep, pt_protect); return flush; } static bool spte_clear_dirty(u64 *sptep) { u64 spte = *sptep; KVM_MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; return mmu_spte_update(sptep, spte); } static bool spte_wrprot_for_clear_dirty(u64 *sptep) { bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); if (was_writable && !spte_ad_enabled(*sptep)) kvm_set_pfn_dirty(spte_to_pfn(*sptep)); return was_writable; } /* * Gets the GFN ready for another round of dirty logging by clearing the * - D bit on ad-enabled SPTEs, and * - W bit on ad-disabled SPTEs. * Returns true iff any D or W bits were cleared. */ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) if (spte_ad_need_write_protect(*sptep)) flush |= spte_wrprot_for_clear_dirty(sptep); else flush |= spte_clear_dirty(sptep); return flush; } static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { struct kvm_rmap_head *rmap_head; if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, true); if (!kvm_memslots_have_rmaps(kvm)) return; while (mask) { rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PG_LEVEL_4K, slot); rmap_write_protect(rmap_head, false); /* clear the first set bit */ mask &= mask - 1; } } static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { struct kvm_rmap_head *rmap_head; if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, false); if (!kvm_memslots_have_rmaps(kvm)) return; while (mask) { rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PG_LEVEL_4K, slot); __rmap_clear_dirty(kvm, rmap_head, slot); /* clear the first set bit */ mask &= mask - 1; } } void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { /* * If the slot was assumed to be "initially all dirty", write-protect * huge pages to ensure they are split to 4KiB on the first write (KVM * dirty logs at 4KiB granularity). If eager page splitting is enabled, * immediately try to split huge pages, e.g. so that vCPUs don't get * saddled with the cost of splitting. * * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn * of memslot has no such restriction, so the range can cross two large * pages. */ if (kvm_dirty_log_manual_protect_and_init_set(kvm)) { gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask); gfn_t end = slot->base_gfn + gfn_offset + __fls(mask); if (READ_ONCE(eager_page_split)) kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K); kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M); /* Cross two large pages? */ if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) != ALIGN(end << PAGE_SHIFT, PMD_SIZE)) kvm_mmu_slot_gfn_write_protect(kvm, slot, end, PG_LEVEL_2M); } /* * (Re)Enable dirty logging for all 4KiB SPTEs that map the GFNs in * mask. If PML is enabled and the GFN doesn't need to be write- * protected for other reasons, e.g. shadow paging, clear the Dirty bit. * Otherwise clear the Writable bit. * * Note that kvm_mmu_clear_dirty_pt_masked() is called whenever PML is * enabled but it chooses between clearing the Dirty bit and Writeable * bit based on the context. */ if (kvm_x86_ops.cpu_dirty_log_size) kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask); else kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } int kvm_cpu_dirty_log_size(void) { return kvm_x86_ops.cpu_dirty_log_size; } bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level) { struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; if (kvm_memslots_have_rmaps(kvm)) { for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = gfn_to_rmap(gfn, i, slot); write_protected |= rmap_write_protect(rmap_head, true); } } if (tdp_mmu_enabled) write_protected |= kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level); return write_protected; } static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K); } static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { return kvm_zap_all_rmap_sptes(kvm, rmap_head); } struct slot_rmap_walk_iterator { /* input fields. */ const struct kvm_memory_slot *slot; gfn_t start_gfn; gfn_t end_gfn; int start_level; int end_level; /* output fields. */ gfn_t gfn; struct kvm_rmap_head *rmap; int level; /* private field. */ struct kvm_rmap_head *end_rmap; }; static void rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) { iterator->level = level; iterator->gfn = iterator->start_gfn; iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot); iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot); } static void slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, const struct kvm_memory_slot *slot, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn) { iterator->slot = slot; iterator->start_level = start_level; iterator->end_level = end_level; iterator->start_gfn = start_gfn; iterator->end_gfn = end_gfn; rmap_walk_init_level(iterator, iterator->start_level); } static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) { return !!iterator->rmap; } static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) { while (++iterator->rmap <= iterator->end_rmap) { iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level); if (iterator->rmap->val) return; } if (++iterator->level > iterator->end_level) { iterator->rmap = NULL; return; } rmap_walk_init_level(iterator, iterator->level); } #define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, \ _start_gfn, _end_gfn, _iter_) \ for (slot_rmap_walk_init(_iter_, _slot_, _start_level_, \ _end_level_, _start_gfn, _end_gfn); \ slot_rmap_walk_okay(_iter_); \ slot_rmap_walk_next(_iter_)) /* The return value indicates if tlb flush on all vcpus is needed. */ typedef bool (*slot_rmaps_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot); static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool can_yield, bool flush_on_yield, bool flush) { struct slot_rmap_walk_iterator iterator; lockdep_assert_held_write(&kvm->mmu_lock); for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, end_gfn, &iterator) { if (iterator.rmap) flush |= fn(kvm, iterator.rmap, slot); if (!can_yield) continue; if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { if (flush && flush_on_yield) { kvm_flush_remote_tlbs_range(kvm, start_gfn, iterator.gfn - start_gfn + 1); flush = false; } cond_resched_rwlock_write(&kvm->mmu_lock); } } return flush; } static __always_inline bool walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, bool flush_on_yield) { return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, slot->base_gfn, slot->base_gfn + slot->npages - 1, true, flush_on_yield, false); } static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, bool flush_on_yield) { return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); } static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, bool can_yield, bool flush) { return __walk_slot_rmaps(kvm, slot, kvm_zap_rmap, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, can_yield, true, flush); } bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { bool flush = false; if (kvm_memslots_have_rmaps(kvm)) flush = __kvm_rmap_zap_gfn_range(kvm, range->slot, range->start, range->end, range->may_block, flush); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); if (kvm_x86_ops.set_apic_access_page_addr && range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); return flush; } #define RMAP_RECYCLE_THRESHOLD 1000 static void __rmap_add(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access) { struct kvm_mmu_page *sp; struct kvm_rmap_head *rmap_head; int rmap_count; sp = sptep_to_sp(spte); kvm_mmu_page_set_translation(sp, spte_index(spte), gfn, access); kvm_update_page_stats(kvm, sp->role.level, 1); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); rmap_count = pte_list_add(cache, spte, rmap_head); if (rmap_count > kvm->stat.max_mmu_rmap_size) kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) { kvm_zap_all_rmap_sptes(kvm, rmap_head); kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } } static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access) { struct kvm_mmu_memory_cache *cache = &vcpu->arch.mmu_pte_list_desc_cache; __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access); } static bool kvm_rmap_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool test_only) { struct slot_rmap_walk_iterator iterator; struct rmap_iterator iter; bool young = false; u64 *sptep; for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) { for_each_rmap_spte(iterator.rmap, &iter, sptep) { u64 spte = *sptep; if (!is_accessed_spte(spte)) continue; if (test_only) return true; if (spte_ad_enabled(spte)) { clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } else { /* * Capture the dirty status of the page, so that * it doesn't get lost when the SPTE is marked * for access tracking. */ if (is_writable_pte(spte)) kvm_set_pfn_dirty(spte_to_pfn(spte)); spte = mark_spte_for_access_track(spte); mmu_spte_update_no_track(sptep, spte); } young = true; } } return young; } bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (kvm_memslots_have_rmaps(kvm)) young = kvm_rmap_age_gfn_range(kvm, range, false); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (kvm_memslots_have_rmaps(kvm)) young = kvm_rmap_age_gfn_range(kvm, range, true); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); return young; } static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp) { #ifdef CONFIG_KVM_PROVE_MMU int i; for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i]))) pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free", sp->spt[i], &sp->spt[i], kvm_mmu_page_get_gfn(sp, i)); } #endif } /* * This value is the sum of all of the kvm instances's * kvm->arch.n_used_mmu_pages values. We need a global, * aggregate version in order to make the slab shrinker * faster */ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); } static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_mod_used_mmu_pages(kvm, +1); kvm_account_pgtable_pages((void *)sp->spt, +1); } static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_mod_used_mmu_pages(kvm, -1); kvm_account_pgtable_pages((void *)sp->spt, -1); } static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) { kvm_mmu_check_sptes_at_free(sp); hlist_del(&sp->hash_link); list_del(&sp->link); free_page((unsigned long)sp->spt); free_page((unsigned long)sp->shadowed_translation); kmem_cache_free(mmu_page_header_cache, sp); } static unsigned kvm_page_table_hashfn(gfn_t gfn) { return hash_64(gfn, KVM_MMU_HASH_SHIFT); } static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache, struct kvm_mmu_page *sp, u64 *parent_pte) { if (!parent_pte) return; pte_list_add(cache, parent_pte, &sp->parent_ptes); } static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { pte_list_remove(kvm, parent_pte, &sp->parent_ptes); } static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { mmu_page_remove_parent_pte(kvm, sp, parent_pte); mmu_spte_clear_no_track(parent_pte); } static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { mark_unsync(sptep); } } static void mark_unsync(u64 *spte) { struct kvm_mmu_page *sp; sp = sptep_to_sp(spte); if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap)) return; if (sp->unsync_children++) return; kvm_mmu_mark_parents_unsync(sp); } #define KVM_PAGE_ARRAY_NR 16 struct kvm_mmu_pages { struct mmu_page_and_offset { struct kvm_mmu_page *sp; unsigned int idx; } page[KVM_PAGE_ARRAY_NR]; unsigned int nr; }; static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, int idx) { int i; if (sp->unsync) for (i=0; i < pvec->nr; i++) if (pvec->page[i].sp == sp) return 0; pvec->page[pvec->nr].sp = sp; pvec->page[pvec->nr].idx = idx; pvec->nr++; return (pvec->nr == KVM_PAGE_ARRAY_NR); } static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) { --sp->unsync_children; WARN_ON_ONCE((int)sp->unsync_children < 0); __clear_bit(idx, sp->unsync_child_bitmap); } static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { int i, ret, nr_unsync_leaf = 0; for_each_set_bit(i, sp->unsync_child_bitmap, 512) { struct kvm_mmu_page *child; u64 ent = sp->spt[i]; if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { clear_unsync_child_bit(sp, i); continue; } child = spte_to_child_sp(ent); if (child->unsync_children) { if (mmu_pages_add(pvec, child, i)) return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); if (!ret) { clear_unsync_child_bit(sp, i); continue; } else if (ret > 0) { nr_unsync_leaf += ret; } else return ret; } else if (child->unsync) { nr_unsync_leaf++; if (mmu_pages_add(pvec, child, i)) return -ENOSPC; } else clear_unsync_child_bit(sp, i); } return nr_unsync_leaf; } #define INVALID_INDEX (-1) static int mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { pvec->nr = 0; if (!sp->unsync_children) return 0; mmu_pages_add(pvec, sp, INVALID_INDEX); return __mmu_unsync_walk(sp, pvec); } static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON_ONCE(!sp->unsync); trace_kvm_mmu_sync_page(sp); sp->unsync = 0; --kvm->stat.mmu_unsync; } static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list); static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); static bool sp_has_gptes(struct kvm_mmu_page *sp) { if (sp->role.direct) return false; if (sp->role.passthrough) return false; return true; } #define for_each_valid_sp(_kvm, _sp, _list) \ hlist_for_each_entry(_sp, _list, hash_link) \ if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ for_each_valid_sp(_kvm, _sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ if ((_sp)->gfn != (_gfn)) {} else #define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ if (!sp_has_gptes(_sp)) {} else static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { union kvm_mmu_page_role root_role = vcpu->arch.mmu->root_role; /* * Ignore various flags when verifying that it's safe to sync a shadow * page using the current MMU context. * * - level: not part of the overall MMU role and will never match as the MMU's * level tracks the root level * - access: updated based on the new guest PTE * - quadrant: not part of the overall MMU role (similar to level) */ const union kvm_mmu_page_role sync_role_ign = { .level = 0xf, .access = 0x7, .quadrant = 0x3, .passthrough = 0x1, }; /* * Direct pages can never be unsync, and KVM should never attempt to * sync a shadow page for a different MMU context, e.g. if the role * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the * reserved bits checks will be wrong, etc... */ if (WARN_ON_ONCE(sp->role.direct || !vcpu->arch.mmu->sync_spte || (sp->role.word ^ root_role.word) & ~sync_role_ign.word)) return false; return true; } static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { /* sp->spt[i] has initial value of shadow page table allocation */ if (sp->spt[i] == SHADOW_NONPRESENT_VALUE) return 0; return vcpu->arch.mmu->sync_spte(vcpu, sp, i); } static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int flush = 0; int i; if (!kvm_sync_page_check(vcpu, sp)) return -1; for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { int ret = kvm_sync_spte(vcpu, sp, i); if (ret < -1) return -1; flush |= ret; } /* * Note, any flush is purely for KVM's correctness, e.g. when dropping * an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier * unmap or dirty logging event doesn't fail to flush. The guest is * responsible for flushing the TLB to ensure any changes in protection * bits are recognized, i.e. until the guest flushes or page faults on * a relevant address, KVM is architecturally allowed to let vCPUs use * cached translations with the old protection bits. */ return flush; } static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int ret = __kvm_sync_page(vcpu, sp); if (ret < 0) kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return ret; } static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, struct list_head *invalid_list, bool remote_flush) { if (!remote_flush && list_empty(invalid_list)) return false; if (!list_empty(invalid_list)) kvm_mmu_commit_zap_page(kvm, invalid_list); else kvm_flush_remote_tlbs(kvm); return true; } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { if (sp->role.invalid) return true; /* TDP MMU pages do not use the MMU generation. */ return !is_tdp_mmu_page(sp) && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } struct mmu_page_path { struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; unsigned int idx[PT64_ROOT_MAX_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ for (i = mmu_pages_first(&pvec, &parents); \ i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ i = mmu_pages_next(&pvec, &parents, i)) static int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, int i) { int n; for (n = i+1; n < pvec->nr; n++) { struct kvm_mmu_page *sp = pvec->page[n].sp; unsigned idx = pvec->page[n].idx; int level = sp->role.level; parents->idx[level-1] = idx; if (level == PG_LEVEL_4K) break; parents->parent[level-2] = sp; } return n; } static int mmu_pages_first(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents) { struct kvm_mmu_page *sp; int level; if (pvec->nr == 0) return 0; WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX); sp = pvec->page[0].sp; level = sp->role.level; WARN_ON_ONCE(level == PG_LEVEL_4K); parents->parent[level-2] = sp; /* Also set up a sentinel. Further entries in pvec are all * children of sp, so this element is never overwritten. */ parents->parent[level-1] = NULL; return mmu_pages_next(pvec, parents, 0); } static void mmu_pages_clear_parents(struct mmu_page_path *parents) { struct kvm_mmu_page *sp; unsigned int level = 0; do { unsigned int idx = parents->idx[level]; sp = parents->parent[level]; if (!sp) return; WARN_ON_ONCE(idx == INVALID_INDEX); clear_unsync_child_bit(sp, idx); level++; } while (!sp->unsync_children); } static int mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *parent, bool can_yield) { int i; struct kvm_mmu_page *sp; struct mmu_page_path parents; struct kvm_mmu_pages pages; LIST_HEAD(invalid_list); bool flush = false; while (mmu_unsync_walk(parent, &pages)) { bool protected = false; for_each_sp(pages, sp, parents, i) protected |= kvm_vcpu_write_protect_gfn(vcpu, sp->gfn); if (protected) { kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, true); flush = false; } for_each_sp(pages, sp, parents, i) { kvm_unlink_unsync_page(vcpu->kvm, sp); flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0; mmu_pages_clear_parents(&parents); } if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); if (!can_yield) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); return -EINTR; } cond_resched_rwlock_write(&vcpu->kvm->mmu_lock); flush = false; } } kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); return 0; } static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { atomic_set(&sp->write_flooding_count, 0); } static void clear_sp_write_flooding_count(u64 *spte) { __clear_sp_write_flooding_count(sptep_to_sp(spte)); } /* * The vCPU is required when finding indirect shadow pages; the shadow * page may already exist and syncing it needs the vCPU pointer in * order to read guest page tables. Direct shadow pages are never * unsync, thus @vcpu can be NULL if @role.direct is true. */ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; int ret; int collisions = 0; LIST_HEAD(invalid_list); for_each_valid_sp(kvm, sp, sp_list) { if (sp->gfn != gfn) { collisions++; continue; } if (sp->role.word != role.word) { /* * If the guest is creating an upper-level page, zap * unsync pages for the same gfn. While it's possible * the guest is using recursive page tables, in all * likelihood the guest has stopped using the unsync * page and is installing a completely unrelated page. * Unsync pages must not be left as is, because the new * upper-level page will be write-protected. */ if (role.level > PG_LEVEL_4K && sp->unsync) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); continue; } /* unsync and write-flooding only apply to indirect SPs. */ if (sp->role.direct) goto out; if (sp->unsync) { if (KVM_BUG_ON(!vcpu, kvm)) break; /* * The page is good, but is stale. kvm_sync_page does * get the latest guest state, but (unlike mmu_unsync_children) * it doesn't write-protect the page or mark it synchronized! * This way the validity of the mapping is ensured, but the * overhead of write protection is not incurred until the * guest invalidates the TLB mapping. This allows multiple * SPs for a single gfn to be unsync. * * If the sync fails, the page is zapped. If so, break * in order to rebuild it. */ ret = kvm_sync_page(vcpu, sp, &invalid_list); if (ret < 0) break; WARN_ON_ONCE(!list_empty(&invalid_list)); if (ret > 0) kvm_flush_remote_tlbs(kvm); } __clear_sp_write_flooding_count(sp); goto out; } sp = NULL; ++kvm->stat.mmu_cache_miss; out: kvm_mmu_commit_zap_page(kvm, &invalid_list); if (collisions > kvm->stat.max_mmu_page_hash_collisions) kvm->stat.max_mmu_page_hash_collisions = collisions; return sp; } /* Caches used when allocating a new shadow page. */ struct shadow_page_caches { struct kvm_mmu_memory_cache *page_header_cache; struct kvm_mmu_memory_cache *shadow_page_cache; struct kvm_mmu_memory_cache *shadowed_info_cache; }; static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, struct shadow_page_caches *caches, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache); sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache); if (!role.direct && role.level <= KVM_MAX_HUGEPAGE_LEVEL) sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() * depends on valid pages being added to the head of the list. See * comments in kvm_zap_obsolete_pages(). */ sp->mmu_valid_gen = kvm->arch.mmu_valid_gen; list_add(&sp->link, &kvm->arch.active_mmu_pages); kvm_account_mmu_page(kvm, sp); sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, sp_list); if (sp_has_gptes(sp)) account_shadowed(kvm, sp); return sp; } /* Note, @vcpu may be NULL if @role.direct is true; see kvm_mmu_find_shadow_page. */ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, struct shadow_page_caches *caches, gfn_t gfn, union kvm_mmu_page_role role) { struct hlist_head *sp_list; struct kvm_mmu_page *sp; bool created = false; sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role); if (!sp) { created = true; sp = kvm_mmu_alloc_shadow_page(kvm, caches, gfn, sp_list, role); } trace_kvm_mmu_get_page(sp, created); return sp; } static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu, gfn_t gfn, union kvm_mmu_page_role role) { struct shadow_page_caches caches = { .page_header_cache = &vcpu->arch.mmu_page_header_cache, .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache, .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache, }; return __kvm_mmu_get_shadow_page(vcpu->kvm, vcpu, &caches, gfn, role); } static union kvm_mmu_page_role kvm_mmu_child_role(u64 *sptep, bool direct, unsigned int access) { struct kvm_mmu_page *parent_sp = sptep_to_sp(sptep); union kvm_mmu_page_role role; role = parent_sp->role; role.level--; role.access = access; role.direct = direct; role.passthrough = 0; /* * If the guest has 4-byte PTEs then that means it's using 32-bit, * 2-level, non-PAE paging. KVM shadows such guests with PAE paging * (i.e. 8-byte PTEs). The difference in PTE size means that KVM must * shadow each guest page table with multiple shadow page tables, which * requires extra bookkeeping in the role. * * Specifically, to shadow the guest's page directory (which covers a * 4GiB address space), KVM uses 4 PAE page directories, each mapping * 1GiB of the address space. @role.quadrant encodes which quarter of * the address space each maps. * * To shadow the guest's page tables (which each map a 4MiB region), KVM * uses 2 PAE page tables, each mapping a 2MiB region. For these, * @role.quadrant encodes which half of the region they map. * * Concretely, a 4-byte PDE consumes bits 31:22, while an 8-byte PDE * consumes bits 29:21. To consume bits 31:30, KVM's uses 4 shadow * PDPTEs; those 4 PAE page directories are pre-allocated and their * quadrant is assigned in mmu_alloc_root(). A 4-byte PTE consumes * bits 21:12, while an 8-byte PTE consumes bits 20:12. To consume * bit 21 in the PTE (the child here), KVM propagates that bit to the * quadrant, i.e. sets quadrant to '0' or '1'. The parent 8-byte PDE * covers bit 21 (see above), thus the quadrant is calculated from the * _least_ significant bit of the PDE index. */ if (role.has_4_byte_gpte) { WARN_ON_ONCE(role.level != PG_LEVEL_4K); role.quadrant = spte_index(sptep) & 1; } return role; } static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, bool direct, unsigned int access) { union kvm_mmu_page_role role; if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) return ERR_PTR(-EEXIST); role = kvm_mmu_child_role(sptep, direct, access); return kvm_mmu_get_shadow_page(vcpu, gfn, role); } static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, hpa_t root, u64 addr) { iterator->addr = addr; iterator->shadow_addr = root; iterator->level = vcpu->arch.mmu->root_role.level; if (iterator->level >= PT64_ROOT_4LEVEL && vcpu->arch.mmu->cpu_role.base.level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu->root_role.direct) iterator->level = PT32E_ROOT_LEVEL; if (iterator->level == PT32E_ROOT_LEVEL) { /* * prev_root is currently only used for 64-bit hosts. So only * the active root_hpa is valid here. */ BUG_ON(root != vcpu->arch.mmu->root.hpa); iterator->shadow_addr = vcpu->arch.mmu->pae_root[(addr >> 30) & 3]; iterator->shadow_addr &= SPTE_BASE_ADDR_MASK; --iterator->level; if (!iterator->shadow_addr) iterator->level = 0; } } static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, u64 addr) { shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root.hpa, addr); } static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { if (iterator->level < PG_LEVEL_4K) return false; iterator->index = SPTE_INDEX(iterator->addr, iterator->level); iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; return true; } static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator, u64 spte) { if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) { iterator->level = 0; return; } iterator->shadow_addr = spte & SPTE_BASE_ADDR_MASK; --iterator->level; } static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) { __shadow_walk_next(iterator, *iterator->sptep); } static void __link_shadow_page(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *sptep, struct kvm_mmu_page *sp, bool flush) { u64 spte; BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); /* * If an SPTE is present already, it must be a leaf and therefore * a large one. Drop it, and flush the TLB if needed, before * installing sp. */ if (is_shadow_present_pte(*sptep)) drop_large_spte(kvm, sptep, flush); spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); mmu_spte_set(sptep, spte); mmu_page_add_parent_pte(cache, sp, sptep); /* * The non-direct sub-pagetable must be updated before linking. For * L1 sp, the pagetable is updated via kvm_sync_page() in * kvm_mmu_find_shadow_page() without write-protecting the gfn, * so sp->unsync can be true or false. For higher level non-direct * sp, the pagetable is updated/synced via mmu_sync_children() in * FNAME(fetch)(), so sp->unsync_children can only be false. * WARN_ON_ONCE() if anything happens unexpectedly. */ if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync) mark_unsync(sptep); } static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, struct kvm_mmu_page *sp) { __link_shadow_page(vcpu->kvm, &vcpu->arch.mmu_pte_list_desc_cache, sptep, sp, true); } static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned direct_access) { if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; /* * For the direct sp, if the guest pte's dirty bit * changed form clean to dirty, it will corrupt the * sp's access: allow writable in the read-only sp, * so we should update the spte at this point to get * a new sp with the correct access. */ child = spte_to_child_sp(*sptep); if (child->role.access == direct_access) return; drop_parent_pte(vcpu->kvm, child, sptep); kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); } } /* Returns the number of zapped non-leaf child shadow pages. */ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *spte, struct list_head *invalid_list) { u64 pte; struct kvm_mmu_page *child; pte = *spte; if (is_shadow_present_pte(pte)) { if (is_last_spte(pte, sp->role.level)) { drop_spte(kvm, spte); } else { child = spte_to_child_sp(pte); drop_parent_pte(kvm, child, spte); /* * Recursively zap nested TDP SPs, parentless SPs are * unlikely to be used again in the near future. This * avoids retaining a large number of stale nested SPs. */ if (tdp_enabled && invalid_list && child->role.guest_mode && !child->parent_ptes.val) return kvm_mmu_prepare_zap_page(kvm, child, invalid_list); } } else if (is_mmio_spte(kvm, pte)) { mmu_spte_clear_no_track(spte); } return 0; } static int kvm_mmu_page_unlink_children(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int zapped = 0; unsigned i; for (i = 0; i < SPTE_ENT_PER_PAGE; ++i) zapped += mmu_page_zap_pte(kvm, sp, sp->spt + i, invalid_list); return zapped; } static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) drop_parent_pte(kvm, sp, sptep); } static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *parent, struct list_head *invalid_list) { int i, zapped = 0; struct mmu_page_path parents; struct kvm_mmu_pages pages; if (parent->role.level == PG_LEVEL_4K) return 0; while (mmu_unsync_walk(parent, &pages)) { struct kvm_mmu_page *sp; for_each_sp(pages, sp, parents, i) { kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); mmu_pages_clear_parents(&parents); zapped++; } } return zapped; } static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list, int *nr_zapped) { bool list_unstable, zapped_root = false; lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list); kvm_mmu_unlink_parents(kvm, sp); /* Zapping children means active_mmu_pages has become unstable. */ list_unstable = *nr_zapped; if (!sp->role.invalid && sp_has_gptes(sp)) unaccount_shadowed(kvm, sp); if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); if (!sp->root_count) { /* Count self */ (*nr_zapped)++; /* * Already invalid pages (previously active roots) are not on * the active page list. See list_del() in the "else" case of * !sp->root_count. */ if (sp->role.invalid) list_add(&sp->link, invalid_list); else list_move(&sp->link, invalid_list); kvm_unaccount_mmu_page(kvm, sp); } else { /* * Remove the active root from the active page list, the root * will be explicitly freed when the root_count hits zero. */ list_del(&sp->link); /* * Obsolete pages cannot be used on any vCPUs, see the comment * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also * treats invalid shadow pages as being obsolete. */ zapped_root = !is_obsolete_sp(kvm, sp); } if (sp->nx_huge_page_disallowed) unaccount_nx_huge_page(kvm, sp); sp->role.invalid = 1; /* * Make the request to free obsolete roots after marking the root * invalid, otherwise other vCPUs may not see it as invalid. */ if (zapped_root) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS); return list_unstable; } static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int nr_zapped; __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped); return nr_zapped; } static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list) { struct kvm_mmu_page *sp, *nsp; if (list_empty(invalid_list)) return; /* * We need to make sure everyone sees our modifications to * the page tables and see changes to vcpu->mode here. The barrier * in the kvm_flush_remote_tlbs() achieves this. This pairs * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end. * * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit * guest mode and/or lockless shadow page table walks. */ kvm_flush_remote_tlbs(kvm); list_for_each_entry_safe(sp, nsp, invalid_list, link) { WARN_ON_ONCE(!sp->role.invalid || sp->root_count); kvm_mmu_free_shadow_page(sp); } } static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm, unsigned long nr_to_zap) { unsigned long total_zapped = 0; struct kvm_mmu_page *sp, *tmp; LIST_HEAD(invalid_list); bool unstable; int nr_zapped; if (list_empty(&kvm->arch.active_mmu_pages)) return 0; restart: list_for_each_entry_safe_reverse(sp, tmp, &kvm->arch.active_mmu_pages, link) { /* * Don't zap active root pages, the page itself can't be freed * and zapping it will just force vCPUs to realloc and reload. */ if (sp->root_count) continue; unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &nr_zapped); total_zapped += nr_zapped; if (total_zapped >= nr_to_zap) break; if (unstable) goto restart; } kvm_mmu_commit_zap_page(kvm, &invalid_list); kvm->stat.mmu_recycled += total_zapped; return total_zapped; } static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) return kvm->arch.n_max_mmu_pages - kvm->arch.n_used_mmu_pages; return 0; } static int make_mmu_pages_available(struct kvm_vcpu *vcpu) { unsigned long avail = kvm_mmu_available_pages(vcpu->kvm); if (likely(avail >= KVM_MIN_FREE_MMU_PAGES)) return 0; kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail); /* * Note, this check is intentionally soft, it only guarantees that one * page is available, while the caller may end up allocating as many as * four pages, e.g. for PAE roots or for 5-level paging. Temporarily * exceeding the (arbitrary by default) limit will not harm the host, * being too aggressive may unnecessarily kill the guest, and getting an * exact count is far more trouble than it's worth, especially in the * page fault paths. */ if (!kvm_mmu_available_pages(vcpu->kvm)) return -ENOSPC; return 0; } /* * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock */ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { write_lock(&kvm->mmu_lock); if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages - goal_nr_mmu_pages); goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; } kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; write_unlock(&kvm->mmu_lock); } bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool always_retry) { struct kvm *kvm = vcpu->kvm; LIST_HEAD(invalid_list); struct kvm_mmu_page *sp; gpa_t gpa = cr2_or_gpa; bool r = false; /* * Bail early if there aren't any write-protected shadow pages to avoid * unnecessarily taking mmu_lock lock, e.g. if the gfn is write-tracked * by a third party. Reading indirect_shadow_pages without holding * mmu_lock is safe, as this is purely an optimization, i.e. a false * positive is benign, and a false negative will simply result in KVM * skipping the unprotect+retry path, which is also an optimization. */ if (!READ_ONCE(kvm->arch.indirect_shadow_pages)) goto out; if (!vcpu->arch.mmu->root_role.direct) { gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); if (gpa == INVALID_GPA) goto out; } write_lock(&kvm->mmu_lock); for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); /* * Snapshot the result before zapping, as zapping will remove all list * entries, i.e. checking the list later would yield a false negative. */ r = !list_empty(&invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); out: if (r || always_retry) { vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); vcpu->arch.last_retry_addr = cr2_or_gpa; } return r; } static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { trace_kvm_mmu_unsync_page(sp); ++kvm->stat.mmu_unsync; sp->unsync = 1; kvm_mmu_mark_parents_unsync(sp); } /* * Attempt to unsync any shadow pages that can be reached by the specified gfn, * KVM is creating a writable mapping for said gfn. Returns 0 if all pages * were marked unsync (or if there is no shadow page), -EPERM if the SPTE must * be write-protected. */ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, bool can_unsync, bool prefetch) { struct kvm_mmu_page *sp; bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page * track machinery is used to write-protect upper-level shadow pages, * i.e. this guards the role.level == 4K assertion below! */ if (kvm_gfn_is_write_tracked(kvm, slot, gfn)) return -EPERM; /* * The page is not write-tracked, mark existing shadow pages unsync * unless KVM is synchronizing an unsync SP (can_unsync = false). In * that case, KVM must complete emulation of the guest TLB flush before * allowing shadow pages to become unsync (writable by the guest). */ for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { if (!can_unsync) return -EPERM; if (sp->unsync) continue; if (prefetch) return -EEXIST; /* * TDP MMU page faults require an additional spinlock as they * run with mmu_lock held for read, not write, and the unsync * logic is not thread safe. Take the spinklock regardless of * the MMU type to avoid extra conditionals/parameters, there's * no meaningful penalty if mmu_lock is held for write. */ if (!locked) { locked = true; spin_lock(&kvm->arch.mmu_unsync_pages_lock); /* * Recheck after taking the spinlock, a different vCPU * may have since marked the page unsync. A false * negative on the unprotected check above is not * possible as clearing sp->unsync _must_ hold mmu_lock * for write, i.e. unsync cannot transition from 1->0 * while this CPU holds mmu_lock for read (or write). */ if (READ_ONCE(sp->unsync)) continue; } WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(kvm, sp); } if (locked) spin_unlock(&kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible * before the SPTE is updated to allow writes because * kvm_mmu_sync_roots() checks the unsync flags without holding * the MMU lock and so can race with this. If the SPTE was updated * before the page had been marked as unsync-ed, something like the * following could happen: * * CPU 1 CPU 2 * --------------------------------------------------------------------- * 1.2 Host updates SPTE * to be writable * 2.1 Guest writes a GPTE for GVA X. * (GPTE being in the guest page table shadowed * by the SP from CPU 1.) * This reads SPTE during the page table walk. * Since SPTE.W is read as 1, there is no * fault. * * 2.2 Guest issues TLB flush. * That causes a VM Exit. * * 2.3 Walking of unsync pages sees sp->unsync is * false and skips the page. * * 2.4 Guest accesses GVA X. * Since the mapping in the SP was not updated, * so the old mapping for GVA X incorrectly * gets used. * 1.1 Host marks SP * as unsync * (sp->unsync = true) * * The write barrier below ensures that 1.1 happens before 1.2 and thus * the situation in 2.4 does not arise. It pairs with the read barrier * in is_unsync_root(), placed between 2.1's load of SPTE.W and 2.3. */ smp_wmb(); return 0; } static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, u64 *sptep, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); int level = sp->role.level; int was_rmapped = 0; int ret = RET_PF_FIXED; bool flush = false; bool wrprot; u64 spte; /* Prefetching always gets a writable pfn. */ bool host_writable = !fault || fault->map_writable; bool prefetch = !fault || fault->prefetch; bool write_fault = fault && fault->write; if (unlikely(is_noslot_pfn(pfn))) { vcpu->stat.pf_mmio_spte_created++; mark_mmio_spte(vcpu, sptep, gfn, pte_access); return RET_PF_EMULATE; } if (is_shadow_present_pte(*sptep)) { /* * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. */ if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; u64 pte = *sptep; child = spte_to_child_sp(pte); drop_parent_pte(vcpu->kvm, child, sptep); flush = true; } else if (pfn != spte_to_pfn(*sptep)) { drop_spte(vcpu->kvm, sptep); flush = true; } else was_rmapped = 1; } wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch, true, host_writable, &spte); if (*sptep == spte) { ret = RET_PF_SPURIOUS; } else { flush |= mmu_spte_update(sptep, spte); trace_kvm_mmu_set_spte(level, gfn, sptep); } if (wrprot && write_fault) ret = RET_PF_WRITE_PROTECTED; if (flush) kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); if (!was_rmapped) { WARN_ON_ONCE(ret == RET_PF_SPURIOUS); rmap_add(vcpu, slot, sptep, gfn, pte_access); } else { /* Already rmapped but the pte_access bits may have changed. */ kvm_mmu_page_set_access(sp, spte_index(sptep), pte_access); } return ret; } static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *start, u64 *end) { struct page *pages[PTE_PREFETCH_NUM]; struct kvm_memory_slot *slot; unsigned int access = sp->role.access; int i, ret; gfn_t gfn; gfn = kvm_mmu_page_get_gfn(sp, spte_index(start)); slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK); if (!slot) return -1; ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start); if (ret <= 0) return -1; for (i = 0; i < ret; i++, gfn++, start++) { mmu_set_spte(vcpu, slot, start, access, gfn, page_to_pfn(pages[i]), NULL); put_page(pages[i]); } return 0; } static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *sptep) { u64 *spte, *start = NULL; int i; WARN_ON_ONCE(!sp->role.direct); i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1); spte = sp->spt + i; for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { if (is_shadow_present_pte(*spte) || spte == sptep) { if (!start) continue; if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) return; start = NULL; } else if (!start) start = spte; } if (start) direct_pte_prefetch_many(vcpu, sp, start, spte); } static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) { struct kvm_mmu_page *sp; sp = sptep_to_sp(sptep); /* * Without accessed bits, there's no way to distinguish between * actually accessed translations and prefetched, so disable pte * prefetch if accessed bits aren't available. */ if (sp_ad_disabled(sp)) return; if (sp->role.level > PG_LEVEL_4K) return; /* * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); } /* * Lookup the mapping level for @gfn in the current mm. * * WARNING! Use of host_pfn_mapping_level() requires the caller and the end * consumer to be tied into KVM's handlers for MMU notifier events! * * There are several ways to safely use this helper: * * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * * - Hold mmu_lock AND ensure there is no in-progress MMU notifier invalidation * event for the hva. This can be done by explicit checking the MMU notifier * or by ensuring that KVM already has a valid mapping that covers the hva. * * - Do not use the result to install new mappings, e.g. use the host mapping * level only to decide whether or not to zap an entry. In this case, it's * not required to hold mmu_lock (though it's highly likely the caller will * want to hold mmu_lock anyways, e.g. to modify SPTEs). * * Note! The lookup can still race with modifications to host page tables, but * the above "rules" ensure KVM will not _consume_ the result of the walk if a * race with the primary MMU occurs. */ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, const struct kvm_memory_slot *slot) { int level = PG_LEVEL_4K; unsigned long hva; unsigned long flags; pgd_t pgd; p4d_t p4d; pud_t pud; pmd_t pmd; /* * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() * is not solely for performance, it's also necessary to avoid the * "writable" check in __gfn_to_hva_many(), which will always fail on * read-only memslots due to gfn_to_hva() assuming writes. Earlier * page fault steps have already verified the guest isn't writing a * read-only memslot. */ hva = __gfn_to_hva_memslot(slot, gfn); /* * Disable IRQs to prevent concurrent tear down of host page tables, * e.g. if the primary MMU promotes a P*D to a huge page and then frees * the original page table. */ local_irq_save(flags); /* * Read each entry once. As above, a non-leaf entry can be promoted to * a huge page _during_ this walk. Re-reading the entry could send the * walk into the weeks, e.g. p*d_leaf() returns false (sees the old * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; p4d = READ_ONCE(*p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; pud = READ_ONCE(*pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; if (pud_leaf(pud)) { level = PG_LEVEL_1G; goto out; } pmd = READ_ONCE(*pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; if (pmd_leaf(pmd)) level = PG_LEVEL_2M; out: local_irq_restore(flags); return level; } static int __kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level, bool is_private) { struct kvm_lpage_info *linfo; int host_level; max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { linfo = lpage_info_slot(gfn, slot, max_level); if (!linfo->disallow_lpage) break; } if (is_private) return max_level; if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; host_level = host_pfn_mapping_level(kvm, gfn, slot); return min(host_level, max_level); } int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, int max_level) { bool is_private = kvm_slot_can_be_private(slot) && kvm_mem_is_private(kvm, gfn); return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private); } void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; kvm_pfn_t mask; fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled; if (unlikely(fault->max_level == PG_LEVEL_4K)) return; if (is_error_noslot_pfn(fault->pfn)) return; if (kvm_slot_dirty_track_enabled(slot)) return; /* * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, fault->gfn, fault->max_level, fault->is_private); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; /* * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; mask = KVM_PAGES_PER_HPAGE(fault->goal_level) - 1; VM_BUG_ON((fault->gfn & mask) != (fault->pfn & mask)); fault->pfn &= ~mask; } void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level) { if (cur_level > PG_LEVEL_4K && cur_level == fault->goal_level && is_shadow_present_pte(spte) && !is_large_pte(spte) && spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* * A small SPTE exists for this pfn, but FNAME(fetch), * direct_map(), or kvm_tdp_mmu_map() would like to create a * large PTE instead: just force them to go down another level, * patching back for them into pfn the next 9 bits of the * address. */ u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) - KVM_PAGES_PER_HPAGE(cur_level - 1); fault->pfn |= fault->gfn & page_mask; fault->goal_level--; } } static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; int ret; gfn_t base_gfn = fault->gfn; kvm_mmu_hugepage_adjust(vcpu, fault); trace_kvm_mmu_spte_requested(fault); for_each_shadow_entry(vcpu, fault->addr, it) { /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, ACC_ALL); if (sp == ERR_PTR(-EEXIST)) continue; link_shadow_page(vcpu, it.sptep, sp); if (fault->huge_page_disallowed) account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) return -EFAULT; ret = mmu_set_spte(vcpu, fault->slot, it.sptep, ACC_ALL, base_gfn, fault->pfn, fault); if (ret == RET_PF_SPURIOUS) return ret; direct_pte_prefetch(vcpu, it.sptep); return ret; } static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long hva = gfn_to_hva_memslot(slot, gfn); send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); } static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { if (is_sigpending_pfn(fault->pfn)) { kvm_handle_signal_exit(vcpu); return -EINTR; } /* * Do not cache the mmio info caused by writing the readonly gfn * into the spte otherwise read access on readonly gfn also can * caused mmio page fault and treat it as mmio access. */ if (fault->pfn == KVM_PFN_ERR_RO_FAULT) return RET_PF_EMULATE; if (fault->pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(fault->slot, fault->gfn); return RET_PF_RETRY; } return -EFAULT; } static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { gva_t gva = fault->is_tdp ? 0 : fault->addr; if (fault->is_private) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } vcpu_cache_mmio_info(vcpu, gva, fault->gfn, access & shadow_mmio_access_mask); fault->slot = NULL; fault->pfn = KVM_PFN_NOSLOT; fault->map_writable = false; fault->hva = KVM_HVA_ERR_BAD; /* * If MMIO caching is disabled, emulate immediately without * touching the shadow page tables as attempting to install an * MMIO SPTE will just be an expensive nop. */ if (unlikely(!enable_mmio_caching)) return RET_PF_EMULATE; /* * Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR, * any guest that generates such gfns is running nested and is being * tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and * only if L1's MAXPHYADDR is inaccurate with respect to the * hardware's). */ if (unlikely(fault->gfn > kvm_mmu_max_gfn())) return RET_PF_EMULATE; return RET_PF_CONTINUE; } static bool page_fault_can_be_fast(struct kvm *kvm, struct kvm_page_fault *fault) { /* * Page faults with reserved bits set, i.e. faults on MMIO SPTEs, only * reach the common page fault handler if the SPTE has an invalid MMIO * generation number. Refreshing the MMIO generation needs to go down * the slow path. Note, EPT Misconfigs do NOT set the PRESENT flag! */ if (fault->rsvd) return false; /* * For hardware-protected VMs, certain conditions like attempting to * perform a write to a page which is not in the state that the guest * expects it to be in can result in a nested/extended #PF. In this * case, the below code might misconstrue this situation as being the * result of a write-protected access, and treat it as a spurious case * rather than taking any action to satisfy the real source of the #PF * such as generating a KVM_EXIT_MEMORY_FAULT. This can lead to the * guest spinning on a #PF indefinitely, so don't attempt the fast path * in this case. * * Note that the kvm_mem_is_private() check might race with an * attribute update, but this will either result in the guest spinning * on RET_PF_SPURIOUS until the update completes, or an actual spurious * case might go down the slow path. Either case will resolve itself. */ if (kvm->arch.has_private_mem && fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) return false; /* * #PF can be fast if: * * 1. The shadow page table entry is not present and A/D bits are * disabled _by KVM_, which could mean that the fault is potentially * caused by access tracking (if enabled). If A/D bits are enabled * by KVM, but disabled by L1 for L2, KVM is forced to disable A/D * bits for L2 and employ access tracking, but the fast page fault * mechanism only supports direct MMUs. * 2. The shadow page table entry is present, the access is a write, * and no reserved bits are set (MMIO SPTEs cannot be "fixed"), i.e. * the fault was caused by a write-protection violation. If the * SPTE is MMU-writable (determined later), the fault can be fixed * by setting the Writable bit, which can be done out of mmu_lock. */ if (!fault->present) return !kvm_ad_enabled(); /* * Note, instruction fetches and writes are mutually exclusive, ignore * the "exec" flag. */ return fault->write; } /* * Returns true if the SPTE was fixed successfully. Otherwise, * someone else modified the SPTE from its original value. */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, u64 new_spte) { /* * Theoretically we could also set dirty bit (and flush TLB) here in * order to eliminate unnecessary PML logging. See comments in * set_spte. But fast_page_fault is very unlikely to happen with PML * enabled, so we do not do this. This might result in the same GPA * to be logged in PML buffer again when the write really happens, and * eventually to be called by mark_page_dirty twice. But it's also no * harm. This also avoids the TLB flush needed after setting dirty bit * so non-PML cases won't be impacted. * * Compare with set_spte where instead shadow_dirty_mask is set. */ if (!try_cmpxchg64(sptep, &old_spte, new_spte)) return false; if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); return true; } static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) { if (fault->exec) return is_executable_pte(spte); if (fault->write) return is_writable_pte(spte); /* Fault was on Read access */ return spte & PT_PRESENT_MASK; } /* * Returns the last level spte pointer of the shadow page walk for the given * gpa, and sets *spte to the spte value. This spte may be non-preset. If no * walk could be performed, returns NULL and *spte does not contain valid data. * * Contract: * - Must be called between walk_shadow_page_lockless_{begin,end}. * - The returned sptep must not be used after walk_shadow_page_lockless_end. */ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte) { struct kvm_shadow_walk_iterator iterator; u64 old_spte; u64 *sptep = NULL; for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) { sptep = iterator.sptep; *spte = old_spte; } return sptep; } /* * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS. */ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp; int ret = RET_PF_INVALID; u64 spte; u64 *sptep; uint retry_count = 0; if (!page_fault_can_be_fast(vcpu->kvm, fault)) return ret; walk_shadow_page_lockless_begin(vcpu); do { u64 new_spte; if (tdp_mmu_enabled) sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->gfn, &spte); else sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte); /* * It's entirely possible for the mapping to have been zapped * by a different task, but the root page should always be * available as the vCPU holds a reference to its root(s). */ if (WARN_ON_ONCE(!sptep)) spte = FROZEN_SPTE; if (!is_shadow_present_pte(spte)) break; sp = sptep_to_sp(sptep); if (!is_last_spte(spte, sp->role.level)) break; /* * Check whether the memory access that caused the fault would * still cause it if it were to be performed right now. If not, * then this is a spurious fault caused by TLB lazily flushed, * or some other CPU has already fixed the PTE after the * current CPU took the fault. * * Need not check the access of upper level table entries since * they are always ACC_ALL. */ if (is_access_allowed(fault, spte)) { ret = RET_PF_SPURIOUS; break; } new_spte = spte; /* * KVM only supports fixing page faults outside of MMU lock for * direct MMUs, nested MMUs are always indirect, and KVM always * uses A/D bits for non-nested MMUs. Thus, if A/D bits are * enabled, the SPTE can't be an access-tracked SPTE. */ if (unlikely(!kvm_ad_enabled()) && is_access_track_spte(spte)) new_spte = restore_acc_track_spte(new_spte); /* * To keep things simple, only SPTEs that are MMU-writable can * be made fully writable outside of mmu_lock, e.g. only SPTEs * that were write-protected for dirty-logging or access * tracking are handled here. Don't bother checking if the * SPTE is writable to prioritize running with A/D bits enabled. * The is_access_allowed() check above handles the common case * of the fault being spurious, and the SPTE is known to be * shadow-present, i.e. except for access tracking restoration * making the new SPTE writable, the check is wasteful. */ if (fault->write && is_mmu_writable_spte(spte)) { new_spte |= PT_WRITABLE_MASK; /* * Do not fix write-permission on the large spte when * dirty logging is enabled. Since we only dirty the * first page into the dirty-bitmap in * fast_pf_fix_direct_spte(), other pages are missed * if its slot has dirty logging enabled. * * Instead, we let the slow page fault path create a * normal spte to fix the access. */ if (sp->role.level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(fault->slot)) break; } /* Verify that the fault can be handled in the fast path */ if (new_spte == spte || !is_access_allowed(fault, new_spte)) break; /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See * Documentation/virt/kvm/locking.rst to get more detail. */ if (fast_pf_fix_direct_spte(vcpu, fault, sptep, spte, new_spte)) { ret = RET_PF_FIXED; break; } if (++retry_count > 4) { pr_warn_once("Fast #PF retrying more than 4 times.\n"); break; } } while (true); trace_fast_page_fault(vcpu, fault, sptep, spte, ret); walk_shadow_page_lockless_end(vcpu); if (ret != RET_PF_INVALID) vcpu->stat.pf_fast++; return ret; } static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list) { struct kvm_mmu_page *sp; if (!VALID_PAGE(*root_hpa)) return; sp = root_to_sp(*root_hpa); if (WARN_ON_ONCE(!sp)) return; if (is_tdp_mmu_page(sp)) { lockdep_assert_held_read(&kvm->mmu_lock); kvm_tdp_mmu_put_root(kvm, sp); } else { lockdep_assert_held_write(&kvm->mmu_lock); if (!--sp->root_count && sp->role.invalid) kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } *root_hpa = INVALID_PAGE; } /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free) { bool is_tdp_mmu = tdp_mmu_enabled && mmu->root_role.direct; int i; LIST_HEAD(invalid_list); bool free_active_root; WARN_ON_ONCE(roots_to_free & ~KVM_MMU_ROOTS_ALL); BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); /* Before acquiring the MMU lock, see if we need to do any real work. */ free_active_root = (roots_to_free & KVM_MMU_ROOT_CURRENT) && VALID_PAGE(mmu->root.hpa); if (!free_active_root) { for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) && VALID_PAGE(mmu->prev_roots[i].hpa)) break; if (i == KVM_MMU_NUM_PREV_ROOTS) return; } if (is_tdp_mmu) read_lock(&kvm->mmu_lock); else write_lock(&kvm->mmu_lock); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) mmu_free_root_page(kvm, &mmu->prev_roots[i].hpa, &invalid_list); if (free_active_root) { if (kvm_mmu_is_dummy_root(mmu->root.hpa)) { /* Nothing to cleanup for dummy roots. */ } else if (root_to_sp(mmu->root.hpa)) { mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list); } else if (mmu->pae_root) { for (i = 0; i < 4; ++i) { if (!IS_VALID_PAE_ROOT(mmu->pae_root[i])) continue; mmu_free_root_page(kvm, &mmu->pae_root[i], &invalid_list); mmu->pae_root[i] = INVALID_PAE_ROOT; } } mmu->root.hpa = INVALID_PAGE; mmu->root.pgd = 0; } if (is_tdp_mmu) { read_unlock(&kvm->mmu_lock); WARN_ON_ONCE(!list_empty(&invalid_list)); } else { kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); } } EXPORT_SYMBOL_GPL(kvm_mmu_free_roots); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) { unsigned long roots_to_free = 0; struct kvm_mmu_page *sp; hpa_t root_hpa; int i; /* * This should not be called while L2 is active, L2 can't invalidate * _only_ its own roots, e.g. INVVPID unconditionally exits. */ WARN_ON_ONCE(mmu->root_role.guest_mode); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { root_hpa = mmu->prev_roots[i].hpa; if (!VALID_PAGE(root_hpa)) continue; sp = root_to_sp(root_hpa); if (!sp || sp->role.guest_mode) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } kvm_mmu_free_roots(kvm, mmu, roots_to_free); } EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots); static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level) { union kvm_mmu_page_role role = vcpu->arch.mmu->root_role; struct kvm_mmu_page *sp; role.level = level; role.quadrant = quadrant; WARN_ON_ONCE(quadrant && !role.has_4_byte_gpte); WARN_ON_ONCE(role.direct && role.has_4_byte_gpte); sp = kvm_mmu_get_shadow_page(vcpu, gfn, role); ++sp->root_count; return __pa(sp->spt); } static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u8 shadow_root_level = mmu->root_role.level; hpa_t root; unsigned i; int r; if (tdp_mmu_enabled) return kvm_tdp_mmu_alloc_root(vcpu); write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); if (r < 0) goto out_unlock; if (shadow_root_level >= PT64_ROOT_4LEVEL) { root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level); mmu->root.hpa = root; } else if (shadow_root_level == PT32E_ROOT_LEVEL) { if (WARN_ON_ONCE(!mmu->pae_root)) { r = -EIO; goto out_unlock; } for (i = 0; i < 4; ++i) { WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i])); root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), 0, PT32_ROOT_LEVEL); mmu->pae_root[i] = root | PT_PRESENT_MASK | shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level); r = -EIO; goto out_unlock; } /* root.pgd is ignored for direct MMUs. */ mmu->root.pgd = 0; out_unlock: write_unlock(&vcpu->kvm->mmu_lock); return r; } static int mmu_first_shadow_root_alloc(struct kvm *kvm) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; int r = 0, i, bkt; /* * Check if this is the first shadow root being allocated before * taking the lock. */ if (kvm_shadow_root_allocated(kvm)) return 0; mutex_lock(&kvm->slots_arch_lock); /* Recheck, under the lock, whether this is the first shadow root. */ if (kvm_shadow_root_allocated(kvm)) goto out_unlock; /* * Check if anything actually needs to be allocated, e.g. all metadata * will be allocated upfront if TDP is disabled. */ if (kvm_memslots_have_rmaps(kvm) && kvm_page_track_write_tracking_enabled(kvm)) goto out_success; for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); kvm_for_each_memslot(slot, bkt, slots) { /* * Both of these functions are no-ops if the target is * already allocated, so unconditionally calling both * is safe. Intentionally do NOT free allocations on * failure to avoid having to track which allocations * were made now versus when the memslot was created. * The metadata is guaranteed to be freed when the slot * is freed, and will be kept/used if userspace retries * KVM_RUN instead of killing the VM. */ r = memslot_rmap_alloc(slot, slot->npages); if (r) goto out_unlock; r = kvm_page_track_write_tracking_alloc(slot); if (r) goto out_unlock; } } /* * Ensure that shadow_root_allocated becomes true strictly after * all the related pointers are set. */ out_success: smp_store_release(&kvm->arch.shadow_root_allocated, true); out_unlock: mutex_unlock(&kvm->slots_arch_lock); return r; } static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u64 pdptrs[4], pm_mask; gfn_t root_gfn, root_pgd; int quadrant, i, r; hpa_t root; root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT; if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) { mmu->root.hpa = kvm_mmu_get_dummy_root(); return 0; } /* * On SVM, reading PDPTRs might access guest memory, which might fault * and thus might sleep. Grab the PDPTRs before acquiring mmu_lock. */ if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { pdptrs[i] = mmu->get_pdptr(vcpu, i); if (!(pdptrs[i] & PT_PRESENT_MASK)) continue; if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT)) pdptrs[i] = 0; } } r = mmu_first_shadow_root_alloc(vcpu->kvm); if (r) return r; write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); if (r < 0) goto out_unlock; /* * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ if (mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { root = mmu_alloc_root(vcpu, root_gfn, 0, mmu->root_role.level); mmu->root.hpa = root; goto set_root_pgd; } if (WARN_ON_ONCE(!mmu->pae_root)) { r = -EIO; goto out_unlock; } /* * We shadow a 32 bit page table. This may be a legacy 2-level * or a PAE 3-level page table. In either case we need to be aware that * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK | shadow_me_value; if (mmu->root_role.level >= PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; if (mmu->root_role.level == PT64_ROOT_5LEVEL) { if (WARN_ON_ONCE(!mmu->pml5_root)) { r = -EIO; goto out_unlock; } mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask; } } for (i = 0; i < 4; ++i) { WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i])); if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { if (!(pdptrs[i] & PT_PRESENT_MASK)) { mmu->pae_root[i] = INVALID_PAE_ROOT; continue; } root_gfn = pdptrs[i] >> PAGE_SHIFT; } /* * If shadowing 32-bit non-PAE page tables, each PAE page * directory maps one quarter of the guest's non-PAE page * directory. Othwerise each PAE page direct shadows one guest * PAE page directory so that quadrant should be 0. */ quadrant = (mmu->cpu_role.base.level == PT32_ROOT_LEVEL) ? i : 0; root = mmu_alloc_root(vcpu, root_gfn, quadrant, PT32_ROOT_LEVEL); mmu->pae_root[i] = root | pm_mask; } if (mmu->root_role.level == PT64_ROOT_5LEVEL) mmu->root.hpa = __pa(mmu->pml5_root); else if (mmu->root_role.level == PT64_ROOT_4LEVEL) mmu->root.hpa = __pa(mmu->pml4_root); else mmu->root.hpa = __pa(mmu->pae_root); set_root_pgd: mmu->root.pgd = root_pgd; out_unlock: write_unlock(&vcpu->kvm->mmu_lock); return r; } static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; bool need_pml5 = mmu->root_role.level > PT64_ROOT_4LEVEL; u64 *pml5_root = NULL; u64 *pml4_root = NULL; u64 *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP * tables are allocated and initialized at root creation as there is no * equivalent level in the guest's NPT to shadow. Allocate the tables * on demand, as running a 32-bit L1 VMM on 64-bit KVM is very rare. */ if (mmu->root_role.direct || mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL || mmu->root_role.level < PT64_ROOT_4LEVEL) return 0; /* * NPT, the only paging mode that uses this horror, uses a fixed number * of levels for the shadow page tables, e.g. all MMUs are 4-level or * all MMus are 5-level. Thus, this can safely require that pml5_root * is allocated if the other roots are valid and pml5 is needed, as any * prior MMU would also have required pml5. */ if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root)) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root || (need_pml5 && mmu->pml5_root))) return -EIO; /* * Unlike 32-bit NPT, the PDP table doesn't need to be in low mem, and * doesn't need to be decrypted. */ pae_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pae_root) return -ENOMEM; #ifdef CONFIG_X86_64 pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pml4_root) goto err_pml4; if (need_pml5) { pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pml5_root) goto err_pml5; } #endif mmu->pae_root = pae_root; mmu->pml4_root = pml4_root; mmu->pml5_root = pml5_root; return 0; #ifdef CONFIG_X86_64 err_pml5: free_page((unsigned long)pml4_root); err_pml4: free_page((unsigned long)pae_root); return -ENOMEM; #endif } static bool is_unsync_root(hpa_t root) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root)) return false; /* * The read barrier orders the CPU's read of SPTE.W during the page table * walk before the reads of sp->unsync/sp->unsync_children here. * * Even if another CPU was marking the SP as unsync-ed simultaneously, * any guest page table changes are not guaranteed to be visible anyway * until this VCPU issues a TLB flush strictly after those changes are * made. We only need to ensure that the other CPU sets these flags * before any actual changes to the page tables are made. The comments * in mmu_try_to_unsync_pages() describe what could go wrong if this * requirement isn't satisfied. */ smp_rmb(); sp = root_to_sp(root); /* * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the * PDPTEs for a given PAE root need to be synchronized individually. */ if (WARN_ON_ONCE(!sp)) return false; if (sp->unsync || sp->unsync_children) return true; return false; } void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { int i; struct kvm_mmu_page *sp; if (vcpu->arch.mmu->root_role.direct) return; if (!VALID_PAGE(vcpu->arch.mmu->root.hpa)) return; vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu->root.hpa; if (!is_unsync_root(root)) return; sp = root_to_sp(root); write_lock(&vcpu->kvm->mmu_lock); mmu_sync_children(vcpu, sp, true); write_unlock(&vcpu->kvm->mmu_lock); return; } write_lock(&vcpu->kvm->mmu_lock); for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu->pae_root[i]; if (IS_VALID_PAE_ROOT(root)) { sp = spte_to_child_sp(root); mmu_sync_children(vcpu, sp, true); } } write_unlock(&vcpu->kvm->mmu_lock); } void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu) { unsigned long roots_to_free = 0; int i; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (is_unsync_root(vcpu->arch.mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); /* sync prev_roots by simply freeing them */ kvm_mmu_free_roots(vcpu->kvm, vcpu->arch.mmu, roots_to_free); } static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t vaddr, u64 access, struct x86_exception *exception) { if (exception) exception->error_code = 0; return kvm_translate_gpa(vcpu, mmu, vaddr, access, exception); } static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { /* * A nested guest cannot use the MMIO cache if it is using nested * page tables, because cr2 is a nGPA while the cache stores GPAs. */ if (mmu_is_nested(vcpu)) return false; if (direct) return vcpu_match_mmio_gpa(vcpu, addr); return vcpu_match_mmio_gva(vcpu, addr); } /* * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. * * Must be called between walk_shadow_page_lockless_{begin,end}. */ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; for (shadow_walk_init(&iterator, vcpu, addr), *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); sptes[leaf] = spte; } return leaf; } static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { int leaf; walk_shadow_page_lockless_begin(vcpu); if (is_tdp_mmu_active(vcpu)) leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level); else leaf = get_walk(vcpu, addr, sptes, root_level); walk_shadow_page_lockless_end(vcpu); return leaf; } /* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; leaf = get_sptes_lockless(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; return reserved; } *sptep = sptes[leaf]; /* * Skip reserved bits checks on the terminal leaf if it's not a valid * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by * design, always have reserved bits set. The purpose of the checks is * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. */ if (!is_shadow_present_pte(sptes[leaf])) leaf++; rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) reserved |= is_rsvd_spte(rsvd_check, sptes[level], level); if (reserved) { pr_err("%s: reserved bits set on MMU-present spte, addr 0x%llx, hierarchy:\n", __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte = 0x%llx level = %d, rsvd bits = 0x%llx", sptes[level], level, get_rsvd_bits(rsvd_check, sptes[level], level)); } return reserved; } static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; if (mmio_info_in_cache(vcpu, addr, direct)) return RET_PF_EMULATE; reserved = get_mmio_spte(vcpu, addr, &spte); if (WARN_ON_ONCE(reserved)) return -EINVAL; if (is_mmio_spte(vcpu->kvm, spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); unsigned int access = get_mmio_spte_access(spte); if (!check_mmio_spte(vcpu, spte)) return RET_PF_INVALID; if (direct) addr = 0; trace_handle_mmio_page_fault(addr, gfn, access); vcpu_cache_mmio_info(vcpu, addr, gfn, access); return RET_PF_EMULATE; } /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ return RET_PF_RETRY; } static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { if (unlikely(fault->rsvd)) return false; if (!fault->present || !fault->write) return false; /* * guest is writing the page which is write tracked which can * not be fixed by page fault handler. */ if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn)) return true; return false; } static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) { struct kvm_shadow_walk_iterator iterator; u64 spte; walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) clear_sp_write_flooding_count(iterator.sptep); walk_shadow_page_lockless_end(vcpu); } static u32 alloc_apf_token(struct kvm_vcpu *vcpu) { /* make sure the token value is not 0 */ u32 id = vcpu->arch.apf.id; if (id << 12 == 0) vcpu->arch.apf.id = 1; return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; } static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_arch_async_pf arch; arch.token = alloc_apf_token(vcpu); arch.gfn = fault->gfn; arch.error_code = fault->error_code; arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); return kvm_setup_async_pf(vcpu, fault->addr, kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch); } void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { int r; if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS)) return; if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || work->wakeup_all) return; r = kvm_mmu_reload(vcpu); if (unlikely(r)) return; if (!vcpu->arch.mmu->root_role.direct && work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) return; r = kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL, NULL); /* * Account fixed page faults, otherwise they'll never be counted, but * ignore stats for all other return times. Page-ready "faults" aren't * truly spurious and never trigger emulation */ if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; } static inline u8 kvm_max_level_for_order(int order) { BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) return PG_LEVEL_1G; if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) return PG_LEVEL_2M; return PG_LEVEL_4K; } static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, u8 max_level, int gmem_order) { u8 req_max_level; if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; max_level = min(kvm_max_level_for_order(gmem_order), max_level); if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; req_max_level = kvm_x86_call(private_max_mapping_level)(kvm, pfn); if (req_max_level) max_level = min(max_level, req_max_level); return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int max_order, r; if (!kvm_slot_can_be_private(fault->slot)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn, &max_order); if (r) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return r; } fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn, fault->max_level, max_order); return RET_PF_CONTINUE; } static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { bool async; if (fault->is_private) return kvm_faultin_pfn_private(vcpu, fault); async = false; fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, false, &async, fault->write, &fault->map_writable, &fault->hva); if (!async) return RET_PF_CONTINUE; /* *pfn has correct page already */ if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(fault->addr, fault->gfn); if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) { trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return RET_PF_RETRY; } else if (kvm_arch_setup_async_pf(vcpu, fault)) { return RET_PF_RETRY; } } /* * Allow gup to bail on pending non-fatal signals when it's also allowed * to wait for IO. Note, gup always bails if it is unable to quickly * get a page and a fatal signal, i.e. SIGKILL, is pending. */ fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, true, NULL, fault->write, &fault->map_writable, &fault->hva); return RET_PF_CONTINUE; } static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { struct kvm_memory_slot *slot = fault->slot; int ret; /* * Note that the mmu_invalidate_seq also serves to detect a concurrent * change in attributes. is_page_fault_stale() will detect an * invalidation relate to fault->fn and resume the guest without * installing a mapping in the page tables. */ fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); /* * Now that we have a snapshot of mmu_invalidate_seq we can check for a * private vs. shared mismatch. */ if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } if (unlikely(!slot)) return kvm_handle_noslot_fault(vcpu, fault, access); /* * Retry the page fault if the gfn hit a memslot that is being deleted * or moved. This ensures any existing SPTEs for the old memslot will * be zapped before KVM inserts a new MMIO SPTE for the gfn. */ if (slot->flags & KVM_MEMSLOT_INVALID) return RET_PF_RETRY; if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) { /* * Don't map L1's APIC access page into L2, KVM doesn't support * using APICv/AVIC to accelerate L2 accesses to L1's APIC, * i.e. the access needs to be emulated. Emulating access to * L1's APIC is also correct if L1 is accelerating L2's own * virtual APIC, but for some reason L1 also maps _L1's_ APIC * into L2. Note, vcpu_is_mmio_gpa() always treats access to * the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM * uses different roots for L1 vs. L2, i.e. there is no danger * of breaking APICv/AVIC for L1. */ if (is_guest_mode(vcpu)) return kvm_handle_noslot_fault(vcpu, fault, access); /* * If the APIC access page exists but is disabled, go directly * to emulation without caching the MMIO access or creating a * MMIO SPTE. That way the cache doesn't need to be purged * when the AVIC is re-enabled. */ if (!kvm_apicv_activated(vcpu->kvm)) return RET_PF_EMULATE; } /* * Check for a relevant mmu_notifier invalidation event before getting * the pfn from the primary MMU, and before acquiring mmu_lock. * * For mmu_lock, if there is an in-progress invalidation and the kernel * allows preemption, the invalidation task may drop mmu_lock and yield * in response to mmu_lock being contended, which is *very* counter- * productive as this vCPU can't actually make forward progress until * the invalidation completes. * * Retrying now can also avoid unnessary lock contention in the primary * MMU, as the primary MMU doesn't necessarily hold a single lock for * the duration of the invalidation, i.e. faulting in a conflicting pfn * can cause the invalidation to take longer by holding locks that are * needed to complete the invalidation. * * Do the pre-check even for non-preemtible kernels, i.e. even if KVM * will never yield mmu_lock in response to contention, as this vCPU is * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held * to detect retry guarantees the worst case latency for the vCPU. */ if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) return RET_PF_RETRY; ret = __kvm_faultin_pfn(vcpu, fault); if (ret != RET_PF_CONTINUE) return ret; if (unlikely(is_error_pfn(fault->pfn))) return kvm_handle_error_pfn(vcpu, fault); if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn))) return kvm_handle_noslot_fault(vcpu, fault, access); /* * Check again for a relevant mmu_notifier invalidation event purely to * avoid contending mmu_lock. Most invalidations will be detected by * the previous check, but checking is extremely cheap relative to the * overall cost of failing to detect the invalidation until after * mmu_lock is acquired. */ if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) { kvm_release_pfn_clean(fault->pfn); return RET_PF_RETRY; } return RET_PF_CONTINUE; } /* * Returns true if the page fault is stale and needs to be retried, i.e. if the * root was invalidated by a memslot update or a relevant mmu_notifier fired. */ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); /* Special roots, e.g. pae_root, are not backed by shadow pages. */ if (sp && is_obsolete_sp(vcpu->kvm, sp)) return true; /* * Roots without an associated shadow page are considered invalid if * there is a pending request to free obsolete roots. The request is * only a hint that the current root _may_ be obsolete and needs to be * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs * to reload even if no vCPU is actively using the root. */ if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) return true; /* * Check for a relevant mmu_notifier invalidation event one last time * now that mmu_lock is held, as the "unsafe" checks performed without * holding mmu_lock can get false negatives. */ return fault->slot && mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int r; /* Dummy roots are used only for shadowing bad guest roots. */ if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) return r; r = mmu_topup_memory_caches(vcpu, false); if (r) return r; r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; r = direct_map(vcpu, fault); out_unlock: write_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } static int nonpaging_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ fault->max_level = PG_LEVEL_2M; return direct_page_fault(vcpu, fault); } int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len) { int r = 1; u32 flags = vcpu->arch.apf.host_apf_flags; #ifndef CONFIG_X86_64 /* A 64-bit CR2 should be impossible on 32-bit KVM. */ if (WARN_ON_ONCE(fault_address >> 32)) return -EFAULT; #endif /* * Legacy #PF exception only have a 32-bit error code. Simply drop the * upper bits as KVM doesn't use them for #PF (because they are never * set), and to ensure there are no collisions with KVM-defined bits. */ if (WARN_ON_ONCE(error_code >> 32)) error_code = lower_32_bits(error_code); /* * Restrict KVM-defined flags to bits 63:32 so that it's impossible for * them to conflict with #PF error codes, which are limited to 32 bits. */ BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK)); vcpu->arch.l1tf_flush_l1d = true; if (!flags) { trace_kvm_page_fault(vcpu, fault_address, error_code); r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, insn_len); } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); kvm_async_pf_task_wait_schedule(fault_address); local_irq_enable(); } else { WARN_ONCE(1, "Unexpected host async PF flags: %x\n", flags); } return r; } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); #ifdef CONFIG_X86_64 static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int r; if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) return r; r = mmu_topup_memory_caches(vcpu, false); if (r) return r; r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; read_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = kvm_tdp_mmu_map(vcpu, fault); out_unlock: read_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } #endif bool kvm_mmu_may_ignore_guest_pat(void) { /* * When EPT is enabled (shadow_memtype_mask is non-zero), and the VM * has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to * honor the memtype from the guest's PAT so that guest accesses to * memory that is DMA'd aren't cached against the guest's wishes. As a * result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA, * KVM _always_ ignores guest PAT (when EPT is enabled). */ return shadow_memtype_mask; } int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { #ifdef CONFIG_X86_64 if (tdp_mmu_enabled) return kvm_tdp_mmu_page_fault(vcpu, fault); #endif return direct_page_fault(vcpu, fault); } static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level) { int r; /* * Restrict to TDP page fault, since that's the only case where the MMU * is indexed by GPA. */ if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) return -EOPNOTSUPP; do { if (signal_pending(current)) return -EINTR; cond_resched(); r = kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); } while (r == RET_PF_RETRY); if (r < 0) return r; switch (r) { case RET_PF_FIXED: case RET_PF_SPURIOUS: case RET_PF_WRITE_PROTECTED: return 0; case RET_PF_EMULATE: return -ENOENT; case RET_PF_RETRY: case RET_PF_CONTINUE: case RET_PF_INVALID: default: WARN_ONCE(1, "could not fix page fault during prefault"); return -EIO; } } long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range) { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; /* * reload is efficient when called repeatedly, so we can do it on * every iteration. */ r = kvm_mmu_reload(vcpu); if (r) return r; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); if (r < 0) return r; /* * If the mapping that covers range->gpa can use a huge page, it * may start below it or end after range->gpa + range->size. */ end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level); return min(range->size, end - range->gpa); } static void nonpaging_init_context(struct kvm_mmu *context) { context->page_fault = nonpaging_page_fault; context->gva_to_gpa = nonpaging_gva_to_gpa; context->sync_spte = NULL; } static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root->hpa)) return false; if (!role.direct && pgd != root->pgd) return false; sp = root_to_sp(root->hpa); if (WARN_ON_ONCE(!sp)) return false; return role.word == sp->role.word; } /* * Find out if a previously cached root matching the new pgd/role is available, * and insert the current root as the MRU in the cache. * If a matching root is found, it is assigned to kvm_mmu->root and * true is returned. * If no match is found, kvm_mmu->root is left invalid, the LRU root is * evicted to make room for the current root, and false is returned. */ static bool cached_root_find_and_keep_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; if (is_root_usable(&mmu->root, new_pgd, new_role)) return true; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { /* * The swaps end up rotating the cache like this: * C 0 1 2 3 (on entry to the function) * 0 C 1 2 3 * 1 C 0 2 3 * 2 C 0 1 3 * 3 C 0 1 2 (on exit from the loop) */ swap(mmu->root, mmu->prev_roots[i]); if (is_root_usable(&mmu->root, new_pgd, new_role)) return true; } kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); return false; } /* * Find out if a previously cached root matching the new pgd/role is available. * On entry, mmu->root is invalid. * If a matching root is found, it is assigned to kvm_mmu->root, the LRU entry * of the cache becomes invalid, and true is returned. * If no match is found, kvm_mmu->root is left invalid and false is returned. */ static bool cached_root_find_without_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (is_root_usable(&mmu->prev_roots[i], new_pgd, new_role)) goto hit; return false; hit: swap(mmu->root, mmu->prev_roots[i]); /* Bubble up the remaining roots. */ for (; i < KVM_MMU_NUM_PREV_ROOTS - 1; i++) mmu->prev_roots[i] = mmu->prev_roots[i + 1]; mmu->prev_roots[i].hpa = INVALID_PAGE; return true; } static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { /* * Limit reuse to 64-bit hosts+VMs without "special" roots in order to * avoid having to deal with PDPTEs and other complexities. */ if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa)) kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); if (VALID_PAGE(mmu->root.hpa)) return cached_root_find_and_keep_current(kvm, mmu, new_pgd, new_role); else return cached_root_find_without_current(kvm, mmu, new_pgd, new_role); } void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) { struct kvm_mmu *mmu = vcpu->arch.mmu; union kvm_mmu_page_role new_role = mmu->root_role; /* * Return immediately if no usable root was found, kvm_mmu_reload() * will establish a valid root prior to the next VM-Enter. */ if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) return; /* * It's possible that the cached previous root page is obsolete because * of a change in the MMU generation number. However, changing the * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, * which will free the root set here and allocate a new one. */ kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); if (force_flush_and_sync_on_reuse) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } /* * The last MMIO access's GVA and GPA are cached in the VCPU. When * switching to a new CR3, that GVA->GPA mapping may no longer be * valid. So clear any cached MMIO info even when we don't need to sync * the shadow page tables. */ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); /* * If this is a direct root page, it doesn't have a write flooding * count. Otherwise, clear the write flooding count. */ if (!new_role.direct) { struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); if (!WARN_ON_ONCE(!sp)) __clear_sp_write_flooding_count(sp); } } EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { mmu_spte_clear_no_track(sptep); return true; } mark_mmio_spte(vcpu, sptep, gfn, access); return true; } return false; } #define PTTYPE_EPT 18 /* arbitrary */ #define PTTYPE PTTYPE_EPT #include "paging_tmpl.h" #undef PTTYPE #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE #define PTTYPE 32 #include "paging_tmpl.h" #undef PTTYPE static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, int level, bool nx, bool gbpages, bool pse, bool amd) { u64 gbpages_bit_rsvd = 0; u64 nonleaf_bit8_rsvd = 0; u64 high_bits_rsvd; rsvd_check->bad_mt_xwr = 0; if (!gbpages) gbpages_bit_rsvd = rsvd_bits(7, 7); if (level == PT32E_ROOT_LEVEL) high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 62); else high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51); /* Note, NX doesn't exist in PDPTEs, this is handled below. */ if (!nx) high_bits_rsvd |= rsvd_bits(63, 63); /* * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ rsvd_check->rsvd_bits_mask[0][1] = 0; rsvd_check->rsvd_bits_mask[0][0] = 0; rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; if (!pse) { rsvd_check->rsvd_bits_mask[1][1] = 0; break; } if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); break; case PT32E_ROOT_LEVEL: rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) | high_bits_rsvd | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */ rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */ rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; case PT64_ROOT_5LEVEL: rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; fallthrough; case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | gbpages_bit_rsvd; rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | gbpages_bit_rsvd | rsvd_bits(13, 29); rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; } } static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { __reset_rsvds_bits_mask(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, context->cpu_role.base.level, is_efer_nx(context), guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), guest_cpuid_is_amd_compatible(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, bool execonly, int huge_page_level) { u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51); u64 large_1g_rsvd = 0, large_2m_rsvd = 0; u64 bad_mt_xwr; if (huge_page_level < PG_LEVEL_1G) large_1g_rsvd = rsvd_bits(7, 7); if (huge_page_level < PG_LEVEL_2M) large_2m_rsvd = rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd; rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd; rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* large page */ rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd; rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd; rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ if (!execonly) { /* bits 0..2 must not be 100 unless VMX capabilities allow it */ bad_mt_xwr |= REPEAT_BYTE(1ull << 4); } rsvd_check->bad_mt_xwr = bad_mt_xwr; } static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly, int huge_page_level) { __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, execonly, huge_page_level); } static inline u64 reserved_hpa_bits(void) { return rsvd_bits(kvm_host.maxphyaddr, 63); } /* * the page table on host is the shadow page table for the page * table in guest or amd nested guest, its mmu features completely * follow the features in guest. */ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { /* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */ bool is_amd = true; /* KVM doesn't use 2-level page tables for the shadow MMU. */ bool is_pse = false; struct rsvd_bits_validate *shadow_zero_check; int i; WARN_ON_ONCE(context->root_role.level < PT32E_ROOT_LEVEL); shadow_zero_check = &context->shadow_zero_check; __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, context->root_role.efer_nx, guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_pse, is_amd); if (!shadow_me_mask) return; for (i = context->root_role.level; --i >= 0;) { /* * So far shadow_me_value is a constant during KVM's life * time. Bits in shadow_me_value are allowed to be set. * Bits in shadow_me_mask but not in shadow_me_value are * not allowed to be set. */ shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask; shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask; shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value; shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value; } } static inline bool boot_cpu_is_amd(void) { WARN_ON_ONCE(!tdp_enabled); return shadow_x_mask == 0; } /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. */ static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) { struct rsvd_bits_validate *shadow_zero_check; int i; shadow_zero_check = &context->shadow_zero_check; if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, true, boot_cpu_has(X86_FEATURE_GBPAGES), false, true); else __reset_rsvds_bits_mask_ept(shadow_zero_check, reserved_hpa_bits(), false, max_huge_page_level); if (!shadow_me_mask) return; for (i = context->root_role.level; --i >= 0;) { shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; } } /* * as the comments in reset_shadow_zero_bits_mask() except it * is the shadow page table for intel nested guest. */ static void reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly) { __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, reserved_hpa_bits(), execonly, max_huge_page_level); } #define BYTE_MASK(access) \ ((1 & (access) ? 2 : 0) | \ (2 & (access) ? 4 : 0) | \ (3 & (access) ? 8 : 0) | \ (4 & (access) ? 16 : 0) | \ (5 & (access) ? 32 : 0) | \ (6 & (access) ? 64 : 0) | \ (7 & (access) ? 128 : 0)) static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept) { unsigned byte; const u8 x = BYTE_MASK(ACC_EXEC_MASK); const u8 w = BYTE_MASK(ACC_WRITE_MASK); const u8 u = BYTE_MASK(ACC_USER_MASK); bool cr4_smep = is_cr4_smep(mmu); bool cr4_smap = is_cr4_smap(mmu); bool cr0_wp = is_cr0_wp(mmu); bool efer_nx = is_efer_nx(mmu); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { unsigned pfec = byte << 1; /* * Each "*f" variable has a 1 bit for each UWX value * that causes a fault with the given PFEC. */ /* Faults from writes to non-writable pages */ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0; /* Faults from user mode accesses to supervisor pages */ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0; /* Faults from fetches of non-executable pages*/ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0; /* Faults from kernel mode fetches of user pages */ u8 smepf = 0; /* Faults from kernel mode accesses of user pages */ u8 smapf = 0; if (!ept) { /* Faults from kernel mode accesses to user pages */ u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; /* Not really needed: !nx will cause pte.nx to fault */ if (!efer_nx) ff = 0; /* Allow supervisor writes if !cr0.wp */ if (!cr0_wp) wf = (pfec & PFERR_USER_MASK) ? wf : 0; /* Disallow supervisor fetches of user code if cr4.smep */ if (cr4_smep) smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; /* * SMAP:kernel-mode data accesses from user-mode * mappings should fault. A fault is considered * as a SMAP violation if all of the following * conditions are true: * - X86_CR4_SMAP is set in CR4 * - A user page is accessed * - The access is not a fetch * - The access is supervisor mode * - If implicit supervisor access or X86_EFLAGS_AC is clear * * Here, we cover the first four conditions. * The fifth is computed dynamically in permission_fault(); * PFERR_RSVD_MASK bit will be set in PFEC if the access is * *not* subject to SMAP restrictions. */ if (cr4_smap) smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; } mmu->permissions[byte] = ff | uf | wf | smepf | smapf; } } /* * PKU is an additional mechanism by which the paging controls access to * user-mode addresses based on the value in the PKRU register. Protection * key violations are reported through a bit in the page fault error code. * Unlike other bits of the error code, the PK bit is not known at the * call site of e.g. gva_to_gpa; it must be computed directly in * permission_fault based on two bits of PKRU, on some machine state (CR4, * CR0, EFER, CPL), and on other bits of the error code and the page tables. * * In particular the following conditions come from the error code, the * page tables and the machine state: * - PK is always zero unless CR4.PKE=1 and EFER.LMA=1 * - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch) * - PK is always zero if U=0 in the page tables * - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access. * * The PKRU bitmask caches the result of these four conditions. The error * code (minus the P bit) and the page table's U bit form an index into the * PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed * with the two bits of the PKRU register corresponding to the protection key. * For the first three conditions above the bits will be 00, thus masking * away both AD and WD. For all reads or if the last condition holds, WD * only will be masked away. */ static void update_pkru_bitmask(struct kvm_mmu *mmu) { unsigned bit; bool wp; mmu->pkru_mask = 0; if (!is_cr4_pke(mmu)) return; wp = is_cr0_wp(mmu); for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) { unsigned pfec, pkey_bits; bool check_pkey, check_write, ff, uf, wf, pte_user; pfec = bit << 1; ff = pfec & PFERR_FETCH_MASK; uf = pfec & PFERR_USER_MASK; wf = pfec & PFERR_WRITE_MASK; /* PFEC.RSVD is replaced by ACC_USER_MASK. */ pte_user = pfec & PFERR_RSVD_MASK; /* * Only need to check the access which is not an * instruction fetch and is to a user page. */ check_pkey = (!ff && pte_user); /* * write access is controlled by PKRU if it is a * user access or CR0.WP = 1. */ check_write = check_pkey && wf && (uf || wp); /* PKRU.AD stops both read and write access. */ pkey_bits = !!check_pkey; /* PKRU.WD stops write access. */ pkey_bits |= (!!check_write) << 1; mmu->pkru_mask |= (pkey_bits & 3) << pfec; } } static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { if (!is_cr0_pg(mmu)) return; reset_guest_rsvds_bits_mask(vcpu, mmu); update_permission_bitmask(mmu, false); update_pkru_bitmask(mmu); } static void paging64_init_context(struct kvm_mmu *context) { context->page_fault = paging64_page_fault; context->gva_to_gpa = paging64_gva_to_gpa; context->sync_spte = paging64_sync_spte; } static void paging32_init_context(struct kvm_mmu *context) { context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; context->sync_spte = paging32_sync_spte; } static union kvm_cpu_role kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) { union kvm_cpu_role role = {0}; role.base.access = ACC_ALL; role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); role.ext.valid = 1; if (!____is_cr0_pg(regs)) { role.base.direct = 1; return role; } role.base.efer_nx = ____is_efer_nx(regs); role.base.cr0_wp = ____is_cr0_wp(regs); role.base.smep_andnot_wp = ____is_cr4_smep(regs) && !____is_cr0_wp(regs); role.base.smap_andnot_wp = ____is_cr4_smap(regs) && !____is_cr0_wp(regs); role.base.has_4_byte_gpte = !____is_cr4_pae(regs); if (____is_efer_lma(regs)) role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; else if (____is_cr4_pae(regs)) role.base.level = PT32E_ROOT_LEVEL; else role.base.level = PT32_ROOT_LEVEL; role.ext.cr4_smep = ____is_cr4_smep(regs); role.ext.cr4_smap = ____is_cr4_smap(regs); role.ext.cr4_pse = ____is_cr4_pse(regs); /* PKEY and LA57 are active iff long mode is active. */ role.ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); role.ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); role.ext.efer_lma = ____is_efer_lma(regs); return role; } void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP); BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); if (is_cr0_wp(mmu) == cr0_wp) return; mmu->cpu_role.base.cr0_wp = cr0_wp; reset_guest_paging_metadata(vcpu, mmu); } static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) { /* tdp_root_level is architecture forced level, use it if nonzero */ if (tdp_root_level) return tdp_root_level; /* Use 5-level TDP if and only if it's useful/necessary. */ if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48) return 4; return max_tdp_level; } u8 kvm_mmu_get_max_tdp_level(void) { return tdp_root_level ? tdp_root_level : max_tdp_level; } static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { union kvm_mmu_page_role role = {0}; role.access = ACC_ALL; role.cr0_wp = true; role.efer_nx = true; role.smm = cpu_role.base.smm; role.guest_mode = cpu_role.base.guest_mode; role.ad_disabled = !kvm_ad_enabled(); role.level = kvm_mmu_get_tdp_level(vcpu); role.direct = true; role.has_4_byte_gpte = false; return role; } static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; union kvm_mmu_page_role root_role = kvm_calc_tdp_mmu_root_page_role(vcpu, cpu_role); if (cpu_role.as_u64 == context->cpu_role.as_u64 && root_role.word == context->root_role.word) return; context->cpu_role.as_u64 = cpu_role.as_u64; context->root_role.word = root_role.word; context->page_fault = kvm_tdp_page_fault; context->sync_spte = NULL; context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; if (!is_cr0_pg(context)) context->gva_to_gpa = nonpaging_gva_to_gpa; else if (is_cr4_pae(context)) context->gva_to_gpa = paging64_gva_to_gpa; else context->gva_to_gpa = paging32_gva_to_gpa; reset_guest_paging_metadata(vcpu, context); reset_tdp_shadow_zero_bits_mask(context); } static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context, union kvm_cpu_role cpu_role, union kvm_mmu_page_role root_role) { if (cpu_role.as_u64 == context->cpu_role.as_u64 && root_role.word == context->root_role.word) return; context->cpu_role.as_u64 = cpu_role.as_u64; context->root_role.word = root_role.word; if (!is_cr0_pg(context)) nonpaging_init_context(context); else if (is_cr4_pae(context)) paging64_init_context(context); else paging32_init_context(context); reset_guest_paging_metadata(vcpu, context); reset_shadow_zero_bits_mask(vcpu, context); } static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; union kvm_mmu_page_role root_role; root_role = cpu_role.base; /* KVM uses PAE paging whenever the guest isn't using 64-bit paging. */ root_role.level = max_t(u32, root_role.level, PT32E_ROOT_LEVEL); /* * KVM forces EFER.NX=1 when TDP is disabled, reflect it in the MMU role. * KVM uses NX when TDP is disabled to handle a variety of scenarios, * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. * The iTLB multi-hit workaround can be toggled at any time, so assume * NX can be used by any non-nested shadow MMU to avoid having to reset * MMU contexts. */ root_role.efer_nx = true; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); } void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, unsigned long cr4, u64 efer, gpa_t nested_cr3) { struct kvm_mmu *context = &vcpu->arch.guest_mmu; struct kvm_mmu_role_regs regs = { .cr0 = cr0, .cr4 = cr4 & ~X86_CR4_PKE, .efer = efer, }; union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs); union kvm_mmu_page_role root_role; /* NPT requires CR0.PG=1. */ WARN_ON_ONCE(cpu_role.base.direct); root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); if (root_role.level == PT64_ROOT_5LEVEL && cpu_role.base.level == PT64_ROOT_4LEVEL) root_role.passthrough = 1; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); } EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu); static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly, u8 level) { union kvm_cpu_role role = {0}; /* * KVM does not support SMM transfer monitors, and consequently does not * support the "entry to SMM" control either. role.base.smm is always 0. */ WARN_ON_ONCE(is_smm(vcpu)); role.base.level = level; role.base.has_4_byte_gpte = false; role.base.direct = false; role.base.ad_disabled = !accessed_dirty; role.base.guest_mode = true; role.base.access = ACC_ALL; role.ext.word = 0; role.ext.execonly = execonly; role.ext.valid = 1; return role; } void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, int huge_page_level, bool accessed_dirty, gpa_t new_eptp) { struct kvm_mmu *context = &vcpu->arch.guest_mmu; u8 level = vmx_eptp_page_walk_level(new_eptp); union kvm_cpu_role new_mode = kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, execonly, level); if (new_mode.as_u64 != context->cpu_role.as_u64) { /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */ context->cpu_role.as_u64 = new_mode.as_u64; context->root_role.word = new_mode.base.word; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_spte = ept_sync_spte; update_permission_bitmask(context, true); context->pkru_mask = 0; reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level); reset_ept_shadow_zero_bits_mask(context, execonly); } kvm_mmu_new_pgd(vcpu, new_eptp); } EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); static void init_kvm_softmmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; kvm_init_shadow_mmu(vcpu, cpu_role); context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; } static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role new_mode) { struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_mode.as_u64 == g_context->cpu_role.as_u64) return; g_context->cpu_role.as_u64 = new_mode.as_u64; g_context->get_guest_pgd = get_guest_cr3; g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; /* * L2 page tables are never shadowed, so there is no need to sync * SPTEs. */ g_context->sync_spte = NULL; /* * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using * L1's nested page tables (e.g. EPT12). The nested translation * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using * L2's page tables as the first level of translation and L1's * nested page tables as the second level of translation. Basically * the gva_to_gpa functions between mmu and nested_mmu are swapped. */ if (!is_paging(vcpu)) g_context->gva_to_gpa = nonpaging_gva_to_gpa; else if (is_long_mode(vcpu)) g_context->gva_to_gpa = paging64_gva_to_gpa; else if (is_pae(vcpu)) g_context->gva_to_gpa = paging64_gva_to_gpa; else g_context->gva_to_gpa = paging32_gva_to_gpa; reset_guest_paging_metadata(vcpu, g_context); } void kvm_init_mmu(struct kvm_vcpu *vcpu) { struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs); if (mmu_is_nested(vcpu)) init_kvm_nested_mmu(vcpu, cpu_role); else if (tdp_enabled) init_kvm_tdp_mmu(vcpu, cpu_role); else init_kvm_softmmu(vcpu, cpu_role); } EXPORT_SYMBOL_GPL(kvm_init_mmu); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) { /* * Invalidate all MMU roles to force them to reinitialize as CPUID * information is factored into reserved bit calculations. * * Correctly handling multiple vCPU models with respect to paging and * physical address properties) in a single VM would require tracking * all relevant CPUID information in kvm_mmu_page_role. That is very * undesirable as it would increase the memory requirements for * gfn_write_track (see struct kvm_mmu_page_role comments). For now * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ vcpu->arch.root_mmu.root_role.invalid = 1; vcpu->arch.guest_mmu.root_role.invalid = 1; vcpu->arch.nested_mmu.root_role.invalid = 1; vcpu->arch.root_mmu.cpu_role.ext.valid = 0; vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; kvm_mmu_reset_context(vcpu); /* * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); kvm_init_mmu(vcpu); } EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct); if (r) goto out; r = mmu_alloc_special_roots(vcpu); if (r) goto out; if (vcpu->arch.mmu->root_role.direct) r = mmu_alloc_direct_roots(vcpu); else r = mmu_alloc_shadow_roots(vcpu); if (r) goto out; kvm_mmu_sync_roots(vcpu); kvm_mmu_load_pgd(vcpu); /* * Flush any TLB entries for the new root, the provenance of the root * is unknown. Even if KVM ensures there are no stale TLB entries * for a freed root, in theory another hypervisor could have left * stale entries. Flushing on alloc also allows KVM to skip the TLB * flush when freeing a root (see kvm_tdp_mmu_put_root()). */ kvm_x86_call(flush_tlb_current)(vcpu); out: return r; } void kvm_mmu_unload(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL); WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa)); kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa)); vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); } static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root_hpa)) return false; /* * When freeing obsolete roots, treat roots as obsolete if they don't * have an associated shadow page, as it's impossible to determine if * such roots are fresh or stale. This does mean KVM will get false * positives and free roots that don't strictly need to be freed, but * such false positives are relatively rare: * * (a) only PAE paging and nested NPT have roots without shadow pages * (or any shadow paging flavor with a dummy root, see note below) * (b) remote reloads due to a memslot update obsoletes _all_ roots * (c) KVM doesn't track previous roots for PAE paging, and the guest * is unlikely to zap an in-use PGD. * * Note! Dummy roots are unique in that they are obsoleted by memslot * _creation_! See also FNAME(fetch). */ sp = root_to_sp(root_hpa); return !sp || is_obsolete_sp(kvm, sp); } static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu) { unsigned long roots_to_free = 0; int i; if (is_obsolete_root(kvm, mmu->root.hpa)) roots_to_free |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } if (roots_to_free) kvm_mmu_free_roots(kvm, mmu, roots_to_free); } void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) { __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) { u64 gentry = 0; int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; } if (*bytes == 4 || *bytes == 8) { r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); if (r) gentry = 0; } return gentry; } /* * If we're seeing too many writes to a page, it may no longer be a page table, * or we may be forking, in which case it is better to unmap the page. */ static bool detect_write_flooding(struct kvm_mmu_page *sp) { /* * Skip write-flooding detected for the sp whose level is 1, because * it can become unsync, then the guest page is not write-protected. */ if (sp->role.level == PG_LEVEL_4K) return false; atomic_inc(&sp->write_flooding_count); return atomic_read(&sp->write_flooding_count) >= 3; } /* * Misaligned accesses are too much trouble to fix up; also, they usually * indicate a page is not used as a page table. */ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, int bytes) { unsigned offset, pte_size, misaligned; offset = offset_in_page(gpa); pte_size = sp->role.has_4_byte_gpte ? 4 : 8; /* * Sometimes, the OS only writes the last one bytes to update status * bits, for example, in linux, andb instruction is used in clear_bit(). */ if (!(offset & (pte_size - 1)) && bytes == 1) return false; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; return misaligned; } static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) { unsigned page_offset, quadrant; u64 *spte; int level; page_offset = offset_in_page(gpa); level = sp->role.level; *nspte = 1; if (sp->role.has_4_byte_gpte) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map * only 2MB. So we need to double the offset again * and zap two pdes instead of one. */ if (level == PT32_ROOT_LEVEL) { page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; *nspte = 2; } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; if (quadrant != sp->role.quadrant) return NULL; } spte = &sp->spt[page_offset / sizeof(*spte)]; return spte; } void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool flush = false; /* * When emulating guest writes, ensure the written value is visible to * any task that is handling page faults before checking whether or not * KVM is shadowing a guest PTE. This ensures either KVM will create * the correct SPTE in the page fault handler, or this task will see * a non-zero indirect_shadow_pages. Pairs with the smp_mb() in * account_shadowed(). */ smp_mb(); if (!vcpu->kvm->arch.indirect_shadow_pages) return; write_lock(&vcpu->kvm->mmu_lock); gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); ++vcpu->kvm->stat.mmu_pte_write; for_each_gfn_valid_sp_with_gptes(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } spte = get_written_sptes(sp, gpa, &npte); if (!spte) continue; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); if (gentry && sp->role.level != PG_LEVEL_4K) ++vcpu->kvm->stat.mmu_pde_zapped; if (is_shadow_present_pte(entry)) flush = true; ++spte; } } kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); write_unlock(&vcpu->kvm->mmu_lock); } static bool is_write_to_guest_page_table(u64 error_code) { const u64 mask = PFERR_GUEST_PAGE_MASK | PFERR_WRITE_MASK | PFERR_PRESENT_MASK; return (error_code & mask) == mask; } static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, int *emulation_type) { bool direct = vcpu->arch.mmu->root_role.direct; /* * Do not try to unprotect and retry if the vCPU re-faulted on the same * RIP with the same address that was previously unprotected, as doing * so will likely put the vCPU into an infinite. E.g. if the vCPU uses * a non-page-table modifying instruction on the PDE that points to the * instruction, then unprotecting the gfn will unmap the instruction's * code, i.e. make it impossible for the instruction to ever complete. */ if (vcpu->arch.last_retry_eip == kvm_rip_read(vcpu) && vcpu->arch.last_retry_addr == cr2_or_gpa) return RET_PF_EMULATE; /* * Reset the unprotect+retry values that guard against infinite loops. * The values will be refreshed if KVM explicitly unprotects a gfn and * retries, in all other cases it's safe to retry in the future even if * the next page fault happens on the same RIP+address. */ vcpu->arch.last_retry_eip = 0; vcpu->arch.last_retry_addr = 0; /* * It should be impossible to reach this point with an MMIO cache hit, * as RET_PF_WRITE_PROTECTED is returned if and only if there's a valid, * writable memslot, and creating a memslot should invalidate the MMIO * cache by way of changing the memslot generation. WARN and disallow * retry if MMIO is detected, as retrying MMIO emulation is pointless * and could put the vCPU into an infinite loop because the processor * will keep faulting on the non-existent MMIO address. */ if (WARN_ON_ONCE(mmio_info_in_cache(vcpu, cr2_or_gpa, direct))) return RET_PF_EMULATE; /* * Before emulating the instruction, check to see if the access was due * to a read-only violation while the CPU was walking non-nested NPT * page tables, i.e. for a direct MMU, for _guest_ page tables in L1. * If L1 is sharing (a subset of) its page tables with L2, e.g. by * having nCR3 share lower level page tables with hCR3, then when KVM * (L0) write-protects the nested NPTs, i.e. npt12 entries, KVM is also * unknowingly write-protecting L1's guest page tables, which KVM isn't * shadowing. * * Because the CPU (by default) walks NPT page tables using a write * access (to ensure the CPU can do A/D updates), page walks in L1 can * trigger write faults for the above case even when L1 isn't modifying * PTEs. As a result, KVM will unnecessarily emulate (or at least, try * to emulate) an excessive number of L1 instructions; because L1's MMU * isn't shadowed by KVM, there is no need to write-protect L1's gPTEs * and thus no need to emulate in order to guarantee forward progress. * * Try to unprotect the gfn, i.e. zap any shadow pages, so that L1 can * proceed without triggering emulation. If one or more shadow pages * was zapped, skip emulation and resume L1 to let it natively execute * the instruction. If no shadow pages were zapped, then the write- * fault is due to something else entirely, i.e. KVM needs to emulate, * as resuming the guest will put it into an infinite loop. * * Note, this code also applies to Intel CPUs, even though it is *very* * unlikely that an L1 will share its page tables (IA32/PAE/paging64 * format) with L2's page tables (EPT format). * * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to * unprotect the gfn and retry if an event is awaiting reinjection. If * KVM emulates multiple instructions before completing event injection, * the event could be delayed beyond what is architecturally allowed, * e.g. KVM could inject an IRQ after the TPR has been raised. */ if (((direct && is_write_to_guest_page_table(error_code)) || (!direct && kvm_event_needs_reinjection(vcpu))) && kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return RET_PF_RETRY; /* * The gfn is write-protected, but if KVM detects its emulating an * instruction that is unlikely to be used to modify page tables, or if * emulation fails, KVM can try to unprotect the gfn and let the CPU * re-execute the instruction that caused the page fault. Do not allow * retrying an instruction from a nested guest as KVM is only explicitly * shadowing L1's page tables, i.e. unprotecting something for L1 isn't * going to magically fix whatever issue caused L2 to fail. */ if (!is_guest_mode(vcpu)) *emulation_type |= EMULTYPE_ALLOW_RETRY_PF; return RET_PF_EMULATE; } int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = EMULTYPE_PF; bool direct = vcpu->arch.mmu->root_role.direct; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; /* * Except for reserved faults (emulated MMIO is shared-only), set the * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's * current attributes, which are the source of truth for such VMs. Note, * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't * currently supported nested virtualization (among many other things) * for software-protected VMs. */ if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && !(error_code & PFERR_RSVD_MASK) && vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) error_code |= PFERR_PRIVATE_ACCESS; r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { if (WARN_ON_ONCE(error_code & PFERR_PRIVATE_ACCESS)) return -EFAULT; r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { vcpu->stat.pf_taken++; r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false, &emulation_type, NULL); if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) return -EIO; } if (r < 0) return r; if (r == RET_PF_WRITE_PROTECTED) r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code, &emulation_type); if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; else if (r == RET_PF_EMULATE) vcpu->stat.pf_emulate++; else if (r == RET_PF_SPURIOUS) vcpu->stat.pf_spurious++; if (r != RET_PF_EMULATE) return 1; emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; int root_level, leaf, level; leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level); if (unlikely(leaf < 0)) return; pr_err("%s %llx", msg, gpa); for (level = root_level; level >= leaf; level--) pr_cont(", spte[%d] = 0x%llx", level, sptes[level]); pr_cont("\n"); } EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes); static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, hpa_t root_hpa) { struct kvm_shadow_walk_iterator iterator; vcpu_clear_mmio_info(vcpu, addr); /* * Walking and synchronizing SPTEs both assume they are operating in * the context of the current MMU, and would need to be reworked if * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT. */ if (WARN_ON_ONCE(mmu != vcpu->arch.mmu)) return; if (!VALID_PAGE(root_hpa)) return; write_lock(&vcpu->kvm->mmu_lock); for_each_shadow_entry_using_root(vcpu, root_hpa, addr, iterator) { struct kvm_mmu_page *sp = sptep_to_sp(iterator.sptep); if (sp->unsync) { int ret = kvm_sync_spte(vcpu, sp, iterator.index); if (ret < 0) mmu_page_zap_pte(vcpu->kvm, sp, iterator.sptep, NULL); if (ret) kvm_flush_remote_tlbs_sptep(vcpu->kvm, iterator.sptep); } if (!sp->unsync_children) break; } write_unlock(&vcpu->kvm->mmu_lock); } void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots) { int i; WARN_ON_ONCE(roots & ~KVM_MMU_ROOTS_ALL); /* It's actually a GPA for vcpu->arch.guest_mmu. */ if (mmu != &vcpu->arch.guest_mmu) { /* INVLPG on a non-canonical address is a NOP according to the SDM. */ if (is_noncanonical_address(addr, vcpu)) return; kvm_x86_call(flush_tlb_gva)(vcpu, addr); } if (!mmu->sync_spte) return; if (roots & KVM_MMU_ROOT_CURRENT) __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->root.hpa); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (roots & KVM_MMU_ROOT_PREVIOUS(i)) __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa); } } EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_addr); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { /* * INVLPG is required to invalidate any global mappings for the VA, * irrespective of PCID. Blindly sync all roots as it would take * roughly the same amount of work/time to determine whether any of the * previous roots have a global mapping. * * Mappings not reachable via the current or previous cached roots will * be synced when switching to that new cr3, so nothing needs to be * done here for them. */ kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { struct kvm_mmu *mmu = vcpu->arch.mmu; unsigned long roots = 0; uint i; if (pcid == kvm_get_active_pcid(vcpu)) roots |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) roots |= KVM_MMU_ROOT_PREVIOUS(i); } if (roots) kvm_mmu_invalidate_addr(vcpu, mmu, gva, roots); ++vcpu->stat.invlpg; /* * Mappings not reachable via the current cr3 or the prev_roots will be * synced when switching to that cr3, so nothing needs to be done here * for them. */ } void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level) { tdp_enabled = enable_tdp; tdp_root_level = tdp_forced_root_level; max_tdp_level = tdp_max_root_level; #ifdef CONFIG_X86_64 tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled; #endif /* * max_huge_page_level reflects KVM's MMU capabilities irrespective * of kernel support, e.g. KVM may be capable of using 1GB pages when * the kernel is not. But, KVM never creates a page size greater than * what is used by the kernel for any given HVA, i.e. the kernel's * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust(). */ if (tdp_enabled) max_huge_page_level = tdp_huge_page_level; else if (boot_cpu_has(X86_FEATURE_GBPAGES)) max_huge_page_level = PG_LEVEL_1G; else max_huge_page_level = PG_LEVEL_2M; } EXPORT_SYMBOL_GPL(kvm_configure_mmu); static void free_mmu_pages(struct kvm_mmu *mmu) { if (!tdp_enabled && mmu->pae_root) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); free_page((unsigned long)mmu->pml4_root); free_page((unsigned long)mmu->pml5_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { struct page *page; int i; mmu->root.hpa = INVALID_PAGE; mmu->root.pgd = 0; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; /* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */ if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu) return 0; /* * When using PAE paging, the four PDPTEs are treated as 'root' pages, * while the PDP table is a per-vCPU construct that's allocated at MMU * creation. When emulating 32-bit mode, cr3 is only 32 bits even on * x86_64. Therefore we need to allocate the PDP table in the first * 4GB of memory, which happens to fit the DMA32 zone. TDP paging * generally doesn't use PAE paging and can skip allocating the PDP * table. The main exception, handled here, is SVM's 32-bit NPT. The * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit * KVM; that horror is handled on-demand by mmu_alloc_special_roots(). */ if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) return 0; page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; mmu->pae_root = page_address(page); /* * CR3 is only 32 bits when PAE paging is used, thus it's impossible to * get the CPU to treat the PDPTEs as encrypted. Decrypt the page so * that KVM's writes and the CPU's reads get along. Note, this is * only necessary when using shadow paging, as 64-bit NPT can get at * the C-bit even when shadowing 32-bit NPT, and SME isn't supported * by 32-bit kernels (when KVM itself uses 32-bit NPT). */ if (!tdp_enabled) set_memory_decrypted((unsigned long)mmu->pae_root, 1); else WARN_ON_ONCE(shadow_me_value); for (i = 0; i < 4; ++i) mmu->pae_root[i] = INVALID_PAE_ROOT; return 0; } int kvm_mmu_create(struct kvm_vcpu *vcpu) { int ret; vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache; vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_shadow_page_cache.init_value = SHADOW_NONPRESENT_VALUE; if (!vcpu->arch.mmu_shadow_page_cache.init_value) vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu = &vcpu->arch.root_mmu; vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu); if (ret) return ret; ret = __kvm_mmu_create(vcpu, &vcpu->arch.root_mmu); if (ret) goto fail_allocate_root; return ret; fail_allocate_root: free_mmu_pages(&vcpu->arch.guest_mmu); return ret; } #define BATCH_ZAP_PAGES 10 static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, &kvm->arch.active_mmu_pages, link) { /* * No obsolete valid page exists before a newly created page * since active_mmu_pages is a FIFO list. */ if (!is_obsolete_sp(kvm, sp)) break; /* * Invalid pages should never land back on the list of active * pages. Skip the bogus page, otherwise we'll get stuck in an * infinite loop if the page gets put back on the list (again). */ if (WARN_ON_ONCE(sp->role.invalid)) continue; /* * No need to flush the TLB since we're only zapping shadow * pages with an obsolete generation number and all vCPUS have * loaded a new root, i.e. the shadow pages being zapped cannot * be in active use by the guest. */ if (batch >= BATCH_ZAP_PAGES && cond_resched_rwlock_write(&kvm->mmu_lock)) { batch = 0; goto restart; } unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &kvm->arch.zapped_obsolete_pages, &nr_zapped); batch += nr_zapped; if (unstable) goto restart; } /* * Kick all vCPUs (via remote TLB flush) before freeing the page tables * to ensure KVM is not in the middle of a lockless shadow page table * walk, which may reference the pages. The remote TLB flush itself is * not required and is simply a convenient way to kick vCPUs as needed. * KVM performs a local TLB flush when allocating a new root (see * kvm_mmu_load()), and the reload in the caller ensure no vCPUs are * running with an obsolete MMU. */ kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); } /* * Fast invalidate all shadow pages and use lock-break technique * to zap obsolete pages. * * It's required when memslot is being deleted or VM is being * destroyed, in these cases, we should ensure that KVM MMU does * not use any resource of the being-deleted slot or all slots * after calling the function. */ static void kvm_mmu_zap_all_fast(struct kvm *kvm) { lockdep_assert_held(&kvm->slots_lock); write_lock(&kvm->mmu_lock); trace_kvm_mmu_zap_all_fast(kvm); /* * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is * held for the entire duration of zapping obsolete pages, it's * impossible for there to be multiple invalid generations associated * with *valid* shadow pages at any given time, i.e. there is exactly * one valid generation and (at most) one invalid generation. */ kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1; /* * In order to ensure all vCPUs drop their soon-to-be invalid roots, * invalidating TDP MMU roots must be done while holding mmu_lock for * write and in the same critical section as making the reload request, * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield. */ if (tdp_mmu_enabled) kvm_tdp_mmu_invalidate_all_roots(kvm); /* * Notify all vcpus to reload its shadow page table and flush TLB. * Then all vcpus will switch to new shadow page table with the new * mmu_valid_gen. * * Note: we need to do this under the protection of mmu_lock, * otherwise, vcpu would purge shadow page but miss tlb flush. */ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS); kvm_zap_obsolete_pages(kvm); write_unlock(&kvm->mmu_lock); /* * Zap the invalidated TDP MMU roots, all SPTEs must be dropped before * returning to the caller, e.g. if the zap is in response to a memslot * deletion, mmu_notifier callbacks will be unable to reach the SPTEs * associated with the deleted memslot once the update completes, and * Deferring the zap until the final reference to the root is put would * lead to use-after-free. */ if (tdp_mmu_enabled) kvm_tdp_mmu_zap_invalidated_roots(kvm); } static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) { return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } void kvm_mmu_init_vm(struct kvm *kvm) { kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); if (tdp_mmu_enabled) kvm_mmu_init_tdp_mmu(kvm); kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; kvm->arch.split_shadow_page_cache.gfp_zero = __GFP_ZERO; kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; } static void mmu_free_vm_memory_caches(struct kvm *kvm) { kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache); kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache); kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache); } void kvm_mmu_uninit_vm(struct kvm *kvm) { if (tdp_mmu_enabled) kvm_mmu_uninit_tdp_mmu(kvm); mmu_free_vm_memory_caches(kvm); } static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { const struct kvm_memory_slot *memslot; struct kvm_memslots *slots; struct kvm_memslot_iter iter; bool flush = false; gfn_t start, end; int i; if (!kvm_memslots_have_rmaps(kvm)) return flush; for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) { memslot = iter.slot; start = max(gfn_start, memslot->base_gfn); end = min(gfn_end, memslot->base_gfn + memslot->npages); if (WARN_ON_ONCE(start >= end)) continue; flush = __kvm_rmap_zap_gfn_range(kvm, memslot, start, end, true, flush); } } return flush; } /* * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end * (not including it) */ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { bool flush; if (WARN_ON_ONCE(gfn_end <= gfn_start)) return; write_lock(&kvm->mmu_lock); kvm_mmu_invalidate_begin(kvm); kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush); if (flush) kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start); kvm_mmu_invalidate_end(kvm); write_unlock(&kvm->mmu_lock); } static bool slot_rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { return rmap_write_protect(rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); walk_slot_rmaps(kvm, memslot, slot_rmap_write_protect, start_level, KVM_MAX_HUGEPAGE_LEVEL, false); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); } } static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) { return kvm_mmu_memory_cache_nr_free_objects(cache) < min; } static bool need_topup_split_caches_or_resched(struct kvm *kvm) { if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) return true; /* * In the worst case, SPLIT_DESC_CACHE_MIN_NR_OBJECTS descriptors are needed * to split a single huge page. Calculating how many are actually needed * is possible but not worth the complexity. */ return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) || need_topup(&kvm->arch.split_page_header_cache, 1) || need_topup(&kvm->arch.split_shadow_page_cache, 1); } static int topup_split_caches(struct kvm *kvm) { /* * Allocating rmap list entries when splitting huge pages for nested * MMUs is uncommon as KVM needs to use a list if and only if there is * more than one rmap entry for a gfn, i.e. requires an L1 gfn to be * aliased by multiple L2 gfns and/or from multiple nested roots with * different roles. Aliasing gfns when using TDP is atypical for VMMs; * a few gfns are often aliased during boot, e.g. when remapping BIOS, * but aliasing rarely occurs post-boot or for many gfns. If there is * only one rmap entry, rmap->val points directly at that one entry and * doesn't need to allocate a list. Buffer the cache by the default * capacity so that KVM doesn't have to drop mmu_lock to topup if KVM * encounters an aliased gfn or two. */ const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE; int r; lockdep_assert_held(&kvm->slots_lock); r = __kvm_mmu_topup_memory_cache(&kvm->arch.split_desc_cache, capacity, SPLIT_DESC_CACHE_MIN_NR_OBJECTS); if (r) return r; r = kvm_mmu_topup_memory_cache(&kvm->arch.split_page_header_cache, 1); if (r) return r; return kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1); } static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep) { struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep); struct shadow_page_caches caches = {}; union kvm_mmu_page_role role; unsigned int access; gfn_t gfn; gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep)); access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep)); /* * Note, huge page splitting always uses direct shadow pages, regardless * of whether the huge page itself is mapped by a direct or indirect * shadow page, since the huge page region itself is being directly * mapped with smaller pages. */ role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access); /* Direct SPs do not require a shadowed_info_cache. */ caches.page_header_cache = &kvm->arch.split_page_header_cache; caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache; /* Safe to pass NULL for vCPU since requesting a direct SP. */ return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role); } static void shadow_mmu_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep) { struct kvm_mmu_memory_cache *cache = &kvm->arch.split_desc_cache; u64 huge_spte = READ_ONCE(*huge_sptep); struct kvm_mmu_page *sp; bool flush = false; u64 *sptep, spte; gfn_t gfn; int index; sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep); for (index = 0; index < SPTE_ENT_PER_PAGE; index++) { sptep = &sp->spt[index]; gfn = kvm_mmu_page_get_gfn(sp, index); /* * The SP may already have populated SPTEs, e.g. if this huge * page is aliased by multiple sptes with the same access * permissions. These entries are guaranteed to map the same * gfn-to-pfn translation since the SP is direct, so no need to * modify them. * * However, if a given SPTE points to a lower level page table, * that lower level page table may only be partially populated. * Installing such SPTEs would effectively unmap a potion of the * huge page. Unmapping guest memory always requires a TLB flush * since a subsequent operation on the unmapped regions would * fail to detect the need to flush. */ if (is_shadow_present_pte(*sptep)) { flush |= !is_last_spte(*sptep, sp->role.level); continue; } spte = make_huge_page_split_spte(kvm, huge_spte, sp->role, index); mmu_spte_set(sptep, spte); __rmap_add(kvm, cache, slot, sptep, gfn, sp->role.access); } __link_shadow_page(kvm, cache, huge_sptep, sp, flush); } static int shadow_mmu_try_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep) { struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep); int level, r = 0; gfn_t gfn; u64 spte; /* Grab information for the tracepoint before dropping the MMU lock. */ gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep)); level = huge_sp->role.level; spte = *huge_sptep; if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) { r = -ENOSPC; goto out; } if (need_topup_split_caches_or_resched(kvm)) { write_unlock(&kvm->mmu_lock); cond_resched(); /* * If the topup succeeds, return -EAGAIN to indicate that the * rmap iterator should be restarted because the MMU lock was * dropped. */ r = topup_split_caches(kvm) ?: -EAGAIN; write_lock(&kvm->mmu_lock); goto out; } shadow_mmu_split_huge_page(kvm, slot, huge_sptep); out: trace_kvm_mmu_split_huge_page(gfn, spte, level, r); return r; } static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { struct rmap_iterator iter; struct kvm_mmu_page *sp; u64 *huge_sptep; int r; restart: for_each_rmap_spte(rmap_head, &iter, huge_sptep) { sp = sptep_to_sp(huge_sptep); /* TDP MMU is enabled, so rmap only contains nested MMU SPs. */ if (WARN_ON_ONCE(!sp->role.guest_mode)) continue; /* The rmaps should never contain non-leaf SPTEs. */ if (WARN_ON_ONCE(!is_large_pte(*huge_sptep))) continue; /* SPs with level >PG_LEVEL_4K should never by unsync. */ if (WARN_ON_ONCE(sp->unsync)) continue; /* Don't bother splitting huge pages on invalid SPs. */ if (sp->role.invalid) continue; r = shadow_mmu_try_split_huge_page(kvm, slot, huge_sptep); /* * The split succeeded or needs to be retried because the MMU * lock was dropped. Either way, restart the iterator to get it * back into a consistent state. */ if (!r || r == -EAGAIN) goto restart; /* The split failed and shouldn't be retried (e.g. -ENOMEM). */ break; } return false; } static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level) { int level; /* * Split huge pages starting with KVM_MAX_HUGEPAGE_LEVEL and working * down to the target level. This ensures pages are recursively split * all the way to the target level. There's no need to split pages * already at the target level. */ for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--) __walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages, level, level, start, end - 1, true, true, false); } /* Must be called with the mmu_lock held in write-mode. */ void kvm_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, u64 start, u64 end, int target_level) { if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, false); /* * A TLB flush is unnecessary at this point for the same reasons as in * kvm_mmu_slot_try_split_huge_pages(). */ } void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level) { u64 start = memslot->base_gfn; u64 end = start + memslot->npages; if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); write_unlock(&kvm->mmu_lock); } read_lock(&kvm->mmu_lock); kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, true); read_unlock(&kvm->mmu_lock); /* * No TLB flush is necessary here. KVM will flush TLBs after * write-protecting and/or clearing dirty on the newly split SPTEs to * ensure that guest writes are reflected in the dirty log before the * ioctl to enable dirty logging on this memslot completes. Since the * split SPTEs retain the write and dirty bits of the huge SPTE, it is * safe for KVM to decide if a TLB flush is necessary based on the split * SPTEs. */ } static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; int need_tlb_flush = 0; struct kvm_mmu_page *sp; restart: for_each_rmap_spte(rmap_head, &iter, sptep) { sp = sptep_to_sp(sptep); /* * We cannot do huge page mapping for indirect shadow pages, * which are found on the last rmap (level = 1) when not using * tdp; such shadow pages are synced with the page table in * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ if (sp->role.direct && sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn, PG_LEVEL_NUM)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_remote_tlbs_range()) kvm_flush_remote_tlbs_sptep(kvm, sptep); else need_tlb_flush = 1; goto restart; } } return need_tlb_flush; } EXPORT_SYMBOL_GPL(kvm_zap_gfn_range); static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) { /* * Note, use KVM_MAX_HUGEPAGE_LEVEL - 1 since there's no need to zap * pages that are already mapped at the maximum hugepage level. */ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true)) kvm_flush_remote_tlbs_memslot(kvm, slot); } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); kvm_rmap_zap_collapsible_sptes(kvm, slot); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot); read_unlock(&kvm->mmu_lock); } } void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* * Clear dirty bits only on 4k SPTEs since the legacy MMU only * support dirty logging at a 4k granularity. */ walk_slot_rmaps_4k(kvm, memslot, __rmap_clear_dirty, false); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); } /* * The caller will flush the TLBs after this function returns. * * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB * out of mmu lock also guarantees no dirty pages will be lost in * dirty_bitmap. */ } static void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); int ign; write_lock(&kvm->mmu_lock); restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { if (WARN_ON_ONCE(sp->role.invalid)) continue; if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) goto restart; if (cond_resched_rwlock_write(&kvm->mmu_lock)) goto restart; } kvm_mmu_commit_zap_page(kvm, &invalid_list); if (tdp_mmu_enabled) kvm_tdp_mmu_zap_all(kvm); write_unlock(&kvm->mmu_lock); } void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_mmu_zap_all(kvm); } static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm, struct kvm_memory_slot *slot, bool flush) { LIST_HEAD(invalid_list); unsigned long i; if (list_empty(&kvm->arch.active_mmu_pages)) goto out_flush; /* * Since accounting information is stored in struct kvm_arch_memory_slot, * shadow pages deletion (e.g. unaccount_shadowed()) requires that all * gfns with a shadow page have a corresponding memslot. Do so before * the memslot goes away. */ for (i = 0; i < slot->npages; i++) { struct kvm_mmu_page *sp; gfn_t gfn = slot->base_gfn + i; for_each_gfn_valid_sp(kvm, sp, gfn) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); flush = false; cond_resched_rwlock_write(&kvm->mmu_lock); } } out_flush: kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); } static void kvm_mmu_zap_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { struct kvm_gfn_range range = { .slot = slot, .start = slot->base_gfn, .end = slot->base_gfn + slot->npages, .may_block = true, }; bool flush; write_lock(&kvm->mmu_lock); flush = kvm_unmap_gfn_range(kvm, &range); kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush); write_unlock(&kvm->mmu_lock); } static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm) { return kvm->arch.vm_type == KVM_X86_DEFAULT_VM && kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { if (kvm_memslot_flush_zap_all(kvm)) kvm_mmu_zap_all_fast(kvm); else kvm_mmu_zap_memslot(kvm, slot); } void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); gen &= MMIO_SPTE_GEN_MASK; /* * Generation numbers are incremented in multiples of the number of * address spaces in order to provide unique generations across all * address spaces. Strip what is effectively the address space * modifier prior to checking for a wrap of the MMIO generation so * that a wrap in any address space is detected. */ gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1); /* * The very rare case: if the MMIO generation number has wrapped, * zap all shadow pages. */ if (unlikely(gen == 0)) { kvm_debug_ratelimited("zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_zap_all_fast(kvm); } } static unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; /* * Never scan more than sc->nr_to_scan VM instances. * Will not hit this condition practically since we do not try * to shrink more than one VM and it is very unlikely to see * !n_used_mmu_pages so many times. */ if (!nr_to_scan--) break; /* * n_used_mmu_pages is accessed without holding kvm->mmu_lock * here. We may skip a VM instance errorneosly, but we do not * want to shrink a VM that only started to populate its MMU * anyway. */ if (!kvm->arch.n_used_mmu_pages && !kvm_has_zapped_obsolete_pages(kvm)) continue; idx = srcu_read_lock(&kvm->srcu); write_lock(&kvm->mmu_lock); if (kvm_has_zapped_obsolete_pages(kvm)) { kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); goto unlock; } freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan); unlock: write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); /* * unfair on small ones * per-vm shrinkers cry out * sadness comes quickly */ list_move_tail(&kvm->vm_list, &vm_list); break; } mutex_unlock(&kvm_lock); return freed; } static unsigned long mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return percpu_counter_read_positive(&kvm_total_used_mmu_pages); } static struct shrinker *mmu_shrinker; static void mmu_destroy_caches(void) { kmem_cache_destroy(pte_list_desc_cache); kmem_cache_destroy(mmu_page_header_cache); } static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) return sysfs_emit(buffer, "never\n"); return param_get_bool(buffer, kp); } static bool get_nx_auto_mode(void) { /* Return true when CPU has the bug, and mitigations are ON */ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); } static void __set_nx_huge_pages(bool val) { nx_huge_pages = itlb_multihit_kvm_mitigation = val; } static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) { bool old_val = nx_huge_pages; bool new_val; if (nx_hugepage_mitigation_hard_disabled) return -EPERM; /* In "auto" mode deploy workaround only if CPU has the bug. */ if (sysfs_streq(val, "off")) { new_val = 0; } else if (sysfs_streq(val, "force")) { new_val = 1; } else if (sysfs_streq(val, "auto")) { new_val = get_nx_auto_mode(); } else if (sysfs_streq(val, "never")) { new_val = 0; mutex_lock(&kvm_lock); if (!list_empty(&vm_list)) { mutex_unlock(&kvm_lock); return -EBUSY; } nx_hugepage_mitigation_hard_disabled = true; mutex_unlock(&kvm_lock); } else if (kstrtobool(val, &new_val) < 0) { return -EINVAL; } __set_nx_huge_pages(new_val); if (new_val != old_val) { struct kvm *kvm; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { mutex_lock(&kvm->slots_lock); kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); wake_up_process(kvm->arch.nx_huge_page_recovery_thread); } mutex_unlock(&kvm_lock); } return 0; } /* * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as * its default value of -1 is technically undefined behavior for a boolean. * Forward the module init call to SPTE code so that it too can handle module * params that need to be resolved/snapshot. */ void __init kvm_mmu_x86_module_init(void) { if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); /* * Snapshot userspace's desire to enable the TDP MMU. Whether or not the * TDP MMU is actually enabled is determined in kvm_configure_mmu() * when the vendor module is loaded. */ tdp_mmu_allowed = tdp_mmu_enabled; kvm_mmu_spte_module_init(); } /* * The bulk of the MMU initialization is deferred until the vendor module is * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need * to be reset when a potentially different vendor module is loaded. */ int kvm_mmu_vendor_module_init(void) { int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an * undefined behavior. However, we supposedly know how compilers behave * and the current status quo is unlikely to change. Guardians below are * supposed to let us know if the assumption becomes false. */ BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32)); BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32)); BUILD_BUG_ON(sizeof(union kvm_cpu_role) != sizeof(u64)); kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = KMEM_CACHE(pte_list_desc, SLAB_ACCOUNT); if (!pte_list_desc_cache) goto out; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) goto out; if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto out; mmu_shrinker = shrinker_alloc(0, "x86-mmu"); if (!mmu_shrinker) goto out_shrinker; mmu_shrinker->count_objects = mmu_shrink_count; mmu_shrinker->scan_objects = mmu_shrink_scan; mmu_shrinker->seeks = DEFAULT_SEEKS * 10; shrinker_register(mmu_shrinker); return 0; out_shrinker: percpu_counter_destroy(&kvm_total_used_mmu_pages); out: mmu_destroy_caches(); return ret; } void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); free_mmu_pages(&vcpu->arch.root_mmu); free_mmu_pages(&vcpu->arch.guest_mmu); mmu_free_memory_caches(vcpu); } void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); shrinker_free(mmu_shrinker); } /* * Calculate the effective recovery period, accounting for '0' meaning "let KVM * select a halving time of 1 hour". Returns true if recovery is enabled. */ static bool calc_nx_huge_pages_recovery_period(uint *period) { /* * Use READ_ONCE to get the params, this may be called outside of the * param setters, e.g. by the kthread to compute its next timeout. */ bool enabled = READ_ONCE(nx_huge_pages); uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio); if (!enabled || !ratio) return false; *period = READ_ONCE(nx_huge_pages_recovery_period_ms); if (!*period) { /* Make sure the period is not less than one second. */ ratio = min(ratio, 3600u); *period = 60 * 60 * 1000 / ratio; } return true; } static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp) { bool was_recovery_enabled, is_recovery_enabled; uint old_period, new_period; int err; if (nx_hugepage_mitigation_hard_disabled) return -EPERM; was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period); err = param_set_uint(val, kp); if (err) return err; is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period); if (is_recovery_enabled && (!was_recovery_enabled || old_period > new_period)) { struct kvm *kvm; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) wake_up_process(kvm->arch.nx_huge_page_recovery_thread); mutex_unlock(&kvm_lock); } return err; } static void kvm_recover_nx_huge_pages(struct kvm *kvm) { unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; struct kvm_memory_slot *slot; int rcu_idx; struct kvm_mmu_page *sp; unsigned int ratio; LIST_HEAD(invalid_list); bool flush = false; ulong to_zap; rcu_idx = srcu_read_lock(&kvm->srcu); write_lock(&kvm->mmu_lock); /* * Zapping TDP MMU shadow pages, including the remote TLB flush, must * be done under RCU protection, because the pages are freed via RCU * callback. */ rcu_read_lock(); ratio = READ_ONCE(nx_huge_pages_recovery_ratio); to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { if (list_empty(&kvm->arch.possible_nx_huge_pages)) break; /* * We use a separate list instead of just using active_mmu_pages * because the number of shadow pages that be replaced with an * NX huge page is expected to be relatively small compared to * the total number of shadow pages. And because the TDP MMU * doesn't use active_mmu_pages. */ sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); WARN_ON_ONCE(!sp->role.direct); /* * Unaccount and do not attempt to recover any NX Huge Pages * that are being dirty tracked, as they would just be faulted * back in as 4KiB pages. The NX Huge Pages in this slot will be * recovered, along with all the other huge pages in the slot, * when dirty logging is disabled. * * Since gfn_to_memslot() is relatively expensive, it helps to * skip it if it the test cannot possibly return true. On the * other hand, if any memslot has logging enabled, chances are * good that all of them do, in which case unaccount_nx_huge_page() * is much cheaper than zapping the page. * * If a memslot update is in progress, reading an incorrect value * of kvm->nr_memslots_dirty_logging is not a problem: if it is * becoming zero, gfn_to_memslot() will be done unnecessarily; if * it is becoming nonzero, the page will be zapped unnecessarily. * Either way, this only affects efficiency in racy situations, * and not correctness. */ slot = NULL; if (atomic_read(&kvm->nr_memslots_dirty_logging)) { struct kvm_memslots *slots; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); WARN_ON_ONCE(!slot); } if (slot && kvm_slot_dirty_track_enabled(slot)) unaccount_nx_huge_page(kvm, sp); else if (is_tdp_mmu_page(sp)) flush |= kvm_tdp_mmu_zap_sp(kvm, sp); else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); WARN_ON_ONCE(sp->nx_huge_page_disallowed); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); rcu_read_unlock(); cond_resched_rwlock_write(&kvm->mmu_lock); flush = false; rcu_read_lock(); } } kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); rcu_read_unlock(); write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, rcu_idx); } static long get_nx_huge_page_recovery_timeout(u64 start_time) { bool enabled; uint period; enabled = calc_nx_huge_pages_recovery_period(&period); return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64() : MAX_SCHEDULE_TIMEOUT; } static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data) { u64 start_time; long remaining_time; while (true) { start_time = get_jiffies_64(); remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop() && remaining_time > 0) { schedule_timeout(remaining_time); remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); } set_current_state(TASK_RUNNING); if (kthread_should_stop()) return 0; kvm_recover_nx_huge_pages(kvm); } } int kvm_mmu_post_init_vm(struct kvm *kvm) { int err; if (nx_hugepage_mitigation_hard_disabled) return 0; err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0, "kvm-nx-lpage-recovery", &kvm->arch.nx_huge_page_recovery_thread); if (!err) kthread_unpark(kvm->arch.nx_huge_page_recovery_thread); return err; } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) { if (kvm->arch.nx_huge_page_recovery_thread) kthread_stop(kvm->arch.nx_huge_page_recovery_thread); } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { /* * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM * can simply ignore such slots. But if userspace is making memory * PRIVATE, then KVM must prevent the guest from accessing the memory * as shared. And if userspace is making memory SHARED and this point * is reached, then at least one page within the range was previously * PRIVATE, i.e. the slot's possible hugepage ranges are changing. * Zapping SPTEs in this case ensures KVM will reassess whether or not * a hugepage can be used for affected ranges. */ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; return kvm_unmap_gfn_range(kvm, range); } static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; } static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; } static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; } static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long attrs) { const unsigned long start = gfn; const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || attrs != kvm_get_memory_attributes(kvm, gfn)) return false; } return true; } bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { unsigned long attrs = range->arg.attributes; struct kvm_memory_slot *slot = range->slot; int level; lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_held(&kvm->slots_lock); /* * Calculate which ranges can be mapped with hugepages even if the slot * can't map memory PRIVATE. KVM mustn't create a SHARED hugepage over * a range that has PRIVATE GFNs, and conversely converting a range to * SHARED may now allow hugepages. */ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; /* * The sequence matters here: upper levels consume the result of lower * level's scanning. */ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); gfn_t gfn = gfn_round_for_level(range->start, level); /* Process the head page if it straddles the range. */ if (gfn != range->start || gfn + nr_pages > range->end) { /* * Skip mixed tracking if the aligned gfn isn't covered * by the memslot, KVM can't use a hugepage due to the * misaligned address regardless of memory attributes. */ if (gfn >= slot->base_gfn && gfn + nr_pages <= slot->base_gfn + slot->npages) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } gfn += nr_pages; } /* * Pages entirely covered by the range are guaranteed to have * only the attributes which were just set. */ for ( ; gfn + nr_pages <= range->end; gfn += nr_pages) hugepage_clear_mixed(slot, gfn, level); /* * Process the last tail page if it straddles the range and is * contained by the memslot. Like the head page, KVM can't * create a hugepage if the slot size is misaligned. */ if (gfn < range->end && (gfn + nr_pages) <= (slot->base_gfn + slot->npages)) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } } return false; } void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, struct kvm_memory_slot *slot) { int level; if (!kvm_arch_has_private_mem(kvm)) return; for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { /* * Don't bother tracking mixed attributes for pages that can't * be huge due to alignment, i.e. process only pages that are * entirely contained by the memslot. */ gfn_t end = gfn_round_for_level(slot->base_gfn + slot->npages, level); gfn_t start = gfn_round_for_level(slot->base_gfn, level); gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); gfn_t gfn; if (start < slot->base_gfn) start += nr_pages; /* * Unlike setting attributes, every potential hugepage needs to * be manually checked as the attributes may already be mixed. */ for (gfn = start; gfn < end; gfn += nr_pages) { unsigned long attrs = kvm_get_memory_attributes(kvm, gfn); if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } } } #endif
888 886 234 674 881 2 3 3716 3717 3734 3708 3620 112 293 293 600 602 8 178 177 45 1 45 45 239 239 1188 284 28 28 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Ethernet-type device handling. * * Version: @(#)eth.c 1.0.7 05/25/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Florian La Roche, <rzsfl@rz.uni-sb.de> * Alan Cox, <gw4pts@gw4pts.ampr.org> * * Fixes: * Mr Linux : Arp problems * Alan Cox : Generic queue tidyup (very tiny here) * Alan Cox : eth_header ntohs should be htons * Alan Cox : eth_rebuild_header missing an htons and * minor other things. * Tegge : Arp bug fixes. * Florian : Removed many unnecessary functions, code cleanup * and changes for new arp and skbuff. * Alan Cox : Redid header building to reflect new format. * Alan Cox : ARP only when compiled with CONFIG_INET * Greg Page : 802.2 and SNAP stuff. * Alan Cox : MAC layer pointers/new format. * Paul Gortmaker : eth_copy_and_sum shouldn't csum padding. * Alan Cox : Protect against forwarding explosions with * older network drivers and IFF_ALLMULTI. * Christer Weinigel : Better rebuild header message. * Andrew Morton : 26Feb01: kill ether_setup() - use netdev_boot_setup(). */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/nvmem-consumer.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/of_net.h> #include <linux/pci.h> #include <linux/property.h> #include <net/dst.h> #include <net/arp.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> #include <net/flow_dissector.h> #include <net/gro.h> #include <linux/uaccess.h> #include <net/pkt_sched.h> /** * eth_header - create the Ethernet header * @skb: buffer to alter * @dev: source device * @type: Ethernet type field * @daddr: destination address (NULL leave destination address) * @saddr: source address (NULL use device source address) * @len: packet length (<= skb->len) * * * Set the protocol type. For a packet of type ETH_P_802_3/2 we put the length * in here instead. */ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ethhdr *eth = skb_push(skb, ETH_HLEN); if (type != ETH_P_802_3 && type != ETH_P_802_2) eth->h_proto = htons(type); else eth->h_proto = htons(len); /* * Set the source hardware address. */ if (!saddr) saddr = dev->dev_addr; memcpy(eth->h_source, saddr, ETH_ALEN); if (daddr) { memcpy(eth->h_dest, daddr, ETH_ALEN); return ETH_HLEN; } /* * Anyway, the loopback-device should never use this function... */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { eth_zero_addr(eth->h_dest); return ETH_HLEN; } return -ETH_HLEN; } EXPORT_SYMBOL(eth_header); /** * eth_get_headlen - determine the length of header for an ethernet frame * @dev: pointer to network device * @data: pointer to start of frame * @len: total length of frame * * Make a best effort attempt to pull the length for all of the headers for * a given frame in a linear buffer. */ u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len) { const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG; const struct ethhdr *eth = (const struct ethhdr *)data; struct flow_keys_basic keys; /* this should never happen, but better safe than sorry */ if (unlikely(len < sizeof(*eth))) return len; /* parse any remaining L2/L3 headers, check for L4 */ if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data, eth->h_proto, sizeof(*eth), len, flags)) return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); } EXPORT_SYMBOL(eth_get_headlen); /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data * @dev: receiving network device * * The rule here is that we * assume 802.3 if the type field is short enough to be a length. * This is normal practice and works for any 'now in use' protocol. */ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { unsigned short _service_access_point; const unsigned short *sap; const struct ethhdr *eth; skb->dev = dev; skb_reset_mac_header(skb); eth = eth_skb_pull_mac(skb); eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* * This is a magic hack to spot IPX packets. Older Novell breaks * the protocol design and runs IPX over 802.3 without an 802.2 LLC * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ sap = skb_header_pointer(skb, 0, sizeof(*sap), &_service_access_point); if (sap && *sap == 0xFFFF) return htons(ETH_P_802_3); /* * Real 802.2 LLC */ return htons(ETH_P_802_2); } EXPORT_SYMBOL(eth_type_trans); /** * eth_header_parse - extract hardware address from packet * @skb: packet to extract header from * @haddr: destination buffer */ int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) { const struct ethhdr *eth = eth_hdr(skb); memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; } EXPORT_SYMBOL(eth_header_parse); /** * eth_header_cache - fill cache entry from neighbour * @neigh: source neighbour * @hh: destination cache entry * @type: Ethernet type field * * Create an Ethernet header template from the neighbour. */ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { struct ethhdr *eth; const struct net_device *dev = neigh->dev; eth = (struct ethhdr *) (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == htons(ETH_P_802_3)) return -1; eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); /* Pairs with READ_ONCE() in neigh_resolve_output(), * neigh_hh_output() and neigh_update_hhs(). */ smp_store_release(&hh->hh_len, ETH_HLEN); return 0; } EXPORT_SYMBOL(eth_header_cache); /** * eth_header_cache_update - update cache entry * @hh: destination cache entry * @dev: network device * @haddr: new hardware address * * Called by Address Resolution module to notify changes in address. */ void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr) { memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), haddr, ETH_ALEN); } EXPORT_SYMBOL(eth_header_cache_update); /** * eth_header_parse_protocol - extract protocol from L2 header * @skb: packet to extract protocol from */ __be16 eth_header_parse_protocol(const struct sk_buff *skb) { const struct ethhdr *eth = eth_hdr(skb); return eth->h_proto; } EXPORT_SYMBOL(eth_header_parse_protocol); /** * eth_prepare_mac_addr_change - prepare for mac change * @dev: network device * @p: socket address */ int eth_prepare_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; return 0; } EXPORT_SYMBOL(eth_prepare_mac_addr_change); /** * eth_commit_mac_addr_change - commit mac change * @dev: network device * @p: socket address */ void eth_commit_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; eth_hw_addr_set(dev, addr->sa_data); } EXPORT_SYMBOL(eth_commit_mac_addr_change); /** * eth_mac_addr - set new Ethernet hardware address * @dev: network device * @p: socket address * * Change hardware address of device. * * This doesn't change hardware matching, so needs to be overridden * for most real devices. */ int eth_mac_addr(struct net_device *dev, void *p) { int ret; ret = eth_prepare_mac_addr_change(dev, p); if (ret < 0) return ret; eth_commit_mac_addr_change(dev, p); return 0; } EXPORT_SYMBOL(eth_mac_addr); int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; return 0; } EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, .parse_protocol = eth_header_parse_protocol, }; /** * ether_setup - setup Ethernet network device * @dev: network device * * Fill in the fields of the device structure with Ethernet-generic values. */ void ether_setup(struct net_device *dev) { dev->header_ops = &eth_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; eth_broadcast_addr(dev->broadcast); } EXPORT_SYMBOL(ether_setup); /** * alloc_etherdev_mqs - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device * @txqs: The number of TX queues this device has. * @rxqs: The number of RX queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. * * Constructs a new net device, complete with a private data area of * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for * this private data area. */ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs) { return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM, ether_setup, txqs, rxqs); } EXPORT_SYMBOL(alloc_etherdev_mqs); ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { return sysfs_emit(buf, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct packet_offload *ptype; unsigned int hlen, off_eth; struct sk_buff *pp = NULL; struct ethhdr *eh, *eh2; struct sk_buff *p; __be16 type; int flush = 1; off_eth = skb_gro_offset(skb); hlen = off_eth + sizeof(*eh); eh = skb_gro_header(skb, hlen, off_eth); if (unlikely(!eh)) goto out; flush = 0; list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; eh2 = (struct ethhdr *)(p->data + off_eth); if (compare_ether_header(eh, eh2)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } type = eh->h_proto; ptype = gro_find_receive_by_type(type); if (ptype == NULL) { flush = 1; goto out; } skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } EXPORT_SYMBOL(eth_gro_receive); int eth_gro_complete(struct sk_buff *skb, int nhoff) { struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff); __be16 type = eh->h_proto; struct packet_offload *ptype; int err = -ENOSYS; if (skb->encapsulation) skb_set_inner_mac_header(skb, nhoff); ptype = gro_find_complete_by_type(type); if (ptype != NULL) err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, nhoff + sizeof(*eh)); return err; } EXPORT_SYMBOL(eth_gro_complete); static struct packet_offload eth_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_TEB), .priority = 10, .callbacks = { .gro_receive = eth_gro_receive, .gro_complete = eth_gro_complete, }, }; static int __init eth_offload_init(void) { dev_add_offload(&eth_packet_offload); return 0; } fs_initcall(eth_offload_init); unsigned char * __weak arch_get_platform_mac_address(void) { return NULL; } int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) { unsigned char *addr; int ret; ret = of_get_mac_address(dev->of_node, mac_addr); if (!ret) return 0; addr = arch_get_platform_mac_address(); if (!addr) return -ENODEV; ether_addr_copy(mac_addr, addr); return 0; } EXPORT_SYMBOL(eth_platform_get_mac_address); /** * platform_get_ethdev_address - Set netdev's MAC address from a given device * @dev: Pointer to the device * @netdev: Pointer to netdev to write the address to * * Wrapper around eth_platform_get_mac_address() which writes the address * directly to netdev->dev_addr. */ int platform_get_ethdev_address(struct device *dev, struct net_device *netdev) { u8 addr[ETH_ALEN] __aligned(2); int ret; ret = eth_platform_get_mac_address(dev, addr); if (!ret) eth_hw_addr_set(netdev, addr); return ret; } EXPORT_SYMBOL(platform_get_ethdev_address); /** * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named * 'mac-address' associated with given device. * * @dev: Device with which the mac-address cell is associated. * @addrbuf: Buffer to which the MAC address will be copied on success. * * Returns 0 on success or a negative error number on failure. */ int nvmem_get_mac_address(struct device *dev, void *addrbuf) { struct nvmem_cell *cell; const void *mac; size_t len; cell = nvmem_cell_get(dev, "mac-address"); if (IS_ERR(cell)) return PTR_ERR(cell); mac = nvmem_cell_read(cell, &len); nvmem_cell_put(cell); if (IS_ERR(mac)) return PTR_ERR(mac); if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { kfree(mac); return -EINVAL; } ether_addr_copy(addrbuf, mac); kfree(mac); return 0; } static int fwnode_get_mac_addr(struct fwnode_handle *fwnode, const char *name, char *addr) { int ret; ret = fwnode_property_read_u8_array(fwnode, name, addr, ETH_ALEN); if (ret) return ret; if (!is_valid_ether_addr(addr)) return -EINVAL; return 0; } /** * fwnode_get_mac_address - Get the MAC from the firmware node * @fwnode: Pointer to the firmware node * @addr: Address of buffer to store the MAC in * * Search the firmware node for the best MAC address to use. 'mac-address' is * checked first, because that is supposed to contain to "most recent" MAC * address. If that isn't set, then 'local-mac-address' is checked next, * because that is the default address. If that isn't set, then the obsolete * 'address' is checked, just in case we're using an old device tree. * * Note that the 'address' property is supposed to contain a virtual address of * the register set, but some DTS files have redefined that property to be the * MAC address. * * All-zero MAC addresses are rejected, because those could be properties that * exist in the firmware tables, but were not updated by the firmware. For * example, the DTS could define 'mac-address' and 'local-mac-address', with * zero MAC addresses. Some older U-Boots only initialized 'local-mac-address'. * In this case, the real MAC is in 'local-mac-address', and 'mac-address' * exists but is all zeros. */ int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr) { if (!fwnode_get_mac_addr(fwnode, "mac-address", addr) || !fwnode_get_mac_addr(fwnode, "local-mac-address", addr) || !fwnode_get_mac_addr(fwnode, "address", addr)) return 0; return -ENOENT; } EXPORT_SYMBOL(fwnode_get_mac_address); /** * device_get_mac_address - Get the MAC for a given device * @dev: Pointer to the device * @addr: Address of buffer to store the MAC in */ int device_get_mac_address(struct device *dev, char *addr) { return fwnode_get_mac_address(dev_fwnode(dev), addr); } EXPORT_SYMBOL(device_get_mac_address); /** * device_get_ethdev_address - Set netdev's MAC address from a given device * @dev: Pointer to the device * @netdev: Pointer to netdev to write the address to * * Wrapper around device_get_mac_address() which writes the address * directly to netdev->dev_addr. */ int device_get_ethdev_address(struct device *dev, struct net_device *netdev) { u8 addr[ETH_ALEN]; int ret; ret = device_get_mac_address(dev, addr); if (!ret) eth_hw_addr_set(netdev, addr); return ret; } EXPORT_SYMBOL(device_get_ethdev_address);
7 7 7 7 8 8 12 3 2 1 3 1 1 2 4 5 6 7 8 10 10 10 18 18 18 11 3 2 2 2 1 1 2 3 4 5 6 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "originator.h" #include "main.h" #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/workqueue.h> #include <net/sock.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "distributed-arp-table.h" #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "multicast.h" #include "netlink.h" #include "network-coding.h" #include "routing.h" #include "soft-interface.h" #include "translation-table.h" /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; /** * batadv_orig_hash_find() - Find and return originator from orig_hash * @bat_priv: the bat priv with all the soft interface information * @data: mac address of the originator * * Return: orig_node (with increased refcnt), NULL on errors */ struct batadv_orig_node * batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; int index; if (!hash) return NULL; index = batadv_choose_orig(data, hash->size); head = &hash->table[index]; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (!batadv_compare_eth(orig_node, data)) continue; if (!kref_get_unless_zero(&orig_node->refcount)) continue; orig_node_tmp = orig_node; break; } rcu_read_unlock(); return orig_node_tmp; } static void batadv_purge_orig(struct work_struct *work); /** * batadv_compare_orig() - comparing function used in the originator hash table * @node: node in the local table * @data2: second object to compare the node to * * Return: true if they are the same originator */ bool batadv_compare_orig(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); return batadv_compare_eth(data1, data2); } /** * batadv_orig_node_vlan_get() - get an orig_node_vlan object * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier * * Return: the vlan object identified by vid and belonging to orig_node or NULL * if it does not exist. */ struct batadv_orig_node_vlan * batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid) { struct batadv_orig_node_vlan *vlan = NULL, *tmp; rcu_read_lock(); hlist_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) { if (tmp->vid != vid) continue; if (!kref_get_unless_zero(&tmp->refcount)) continue; vlan = tmp; break; } rcu_read_unlock(); return vlan; } /** * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding * @vid: the VLAN identifier * * Return: true when either no vlan is set or if VLAN is in correct range, * false otherwise */ static bool batadv_vlan_id_valid(unsigned short vid) { unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK); if (vid == 0) return true; if (!(vid & BATADV_VLAN_HAS_TAG)) return false; if (non_vlan) return false; return true; } /** * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan * object * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier * * Return: NULL in case of failure or the vlan object identified by vid and * belonging to orig_node otherwise. The object is created and added to the list * if it does not exist. * * The object is returned with refcounter increased by 1. */ struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid) { struct batadv_orig_node_vlan *vlan; if (!batadv_vlan_id_valid(vid)) return NULL; spin_lock_bh(&orig_node->vlan_list_lock); /* first look if an object for this vid already exists */ vlan = batadv_orig_node_vlan_get(orig_node, vid); if (vlan) goto out; vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) goto out; kref_init(&vlan->refcount); vlan->vid = vid; kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); out: spin_unlock_bh(&orig_node->vlan_list_lock); return vlan; } /** * batadv_orig_node_vlan_release() - release originator-vlan object from lists * and queue for free after rcu grace period * @ref: kref pointer of the originator-vlan object */ void batadv_orig_node_vlan_release(struct kref *ref) { struct batadv_orig_node_vlan *orig_vlan; orig_vlan = container_of(ref, struct batadv_orig_node_vlan, refcount); kfree_rcu(orig_vlan, rcu); } /** * batadv_originator_init() - Initialize all originator structures * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure */ int batadv_originator_init(struct batadv_priv *bat_priv) { if (bat_priv->orig_hash) return 0; bat_priv->orig_hash = batadv_hash_new(1024); if (!bat_priv->orig_hash) goto err; batadv_hash_set_lock_class(bat_priv->orig_hash, &batadv_orig_hash_lock_class_key); INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig); queue_delayed_work(batadv_event_workqueue, &bat_priv->orig_work, msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); return 0; err: return -ENOMEM; } /** * batadv_neigh_ifinfo_release() - release neigh_ifinfo from lists and queue for * free after rcu grace period * @ref: kref pointer of the neigh_ifinfo */ void batadv_neigh_ifinfo_release(struct kref *ref) { struct batadv_neigh_ifinfo *neigh_ifinfo; neigh_ifinfo = container_of(ref, struct batadv_neigh_ifinfo, refcount); if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_put(neigh_ifinfo->if_outgoing); kfree_rcu(neigh_ifinfo, rcu); } /** * batadv_hardif_neigh_release() - release hardif neigh node from lists and * queue for free after rcu grace period * @ref: kref pointer of the neigh_node */ void batadv_hardif_neigh_release(struct kref *ref) { struct batadv_hardif_neigh_node *hardif_neigh; hardif_neigh = container_of(ref, struct batadv_hardif_neigh_node, refcount); spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); hlist_del_init_rcu(&hardif_neigh->list); spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); batadv_hardif_put(hardif_neigh->if_incoming); kfree_rcu(hardif_neigh, rcu); } /** * batadv_neigh_node_release() - release neigh_node from lists and queue for * free after rcu grace period * @ref: kref pointer of the neigh_node */ void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; neigh_node = container_of(ref, struct batadv_neigh_node, refcount); hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { batadv_neigh_ifinfo_put(neigh_ifinfo); } batadv_hardif_neigh_put(neigh_node->hardif_neigh); batadv_hardif_put(neigh_node->if_incoming); kfree_rcu(neigh_node, rcu); } /** * batadv_orig_router_get() - router to the originator depending on iface * @orig_node: the orig node for the router * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to * * Return: the neighbor which should be the router for this orig_node/iface. * * The object is returned with refcounter increased by 1. */ struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing) { struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(orig_ifinfo, &orig_node->ifinfo_list, list) { if (orig_ifinfo->if_outgoing != if_outgoing) continue; router = rcu_dereference(orig_ifinfo->router); break; } if (router && !kref_get_unless_zero(&router->refcount)) router = NULL; rcu_read_unlock(); return router; } /** * batadv_orig_to_router() - get next hop neighbor to an orig address * @bat_priv: the bat priv with all the soft interface information * @orig_addr: the originator MAC address to search the best next hop router for * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to * * Return: A neighbor node which is the best router towards the given originator * address. */ struct batadv_neigh_node * batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *neigh_node; struct batadv_orig_node *orig_node; orig_node = batadv_orig_hash_find(bat_priv, orig_addr); if (!orig_node) return NULL; neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing); batadv_orig_node_put(orig_node); return neigh_node; } /** * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * * Return: the requested orig_ifinfo or NULL if not found. * * The object is returned with refcounter increased by 1. */ struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { struct batadv_orig_ifinfo *tmp, *orig_ifinfo = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(tmp, &orig_node->ifinfo_list, list) { if (tmp->if_outgoing != if_outgoing) continue; if (!kref_get_unless_zero(&tmp->refcount)) continue; orig_ifinfo = tmp; break; } rcu_read_unlock(); return orig_ifinfo; } /** * batadv_orig_ifinfo_new() - search and possibly create an orig_ifinfo object * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * * Return: NULL in case of failure or the orig_ifinfo object for the if_outgoing * interface otherwise. The object is created and added to the list * if it does not exist. * * The object is returned with refcounter increased by 1. */ struct batadv_orig_ifinfo * batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { struct batadv_orig_ifinfo *orig_ifinfo; unsigned long reset_time; spin_lock_bh(&orig_node->neigh_list_lock); orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing); if (orig_ifinfo) goto out; orig_ifinfo = kzalloc(sizeof(*orig_ifinfo), GFP_ATOMIC); if (!orig_ifinfo) goto out; if (if_outgoing != BATADV_IF_DEFAULT) kref_get(&if_outgoing->refcount); reset_time = jiffies - 1; reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_ifinfo->batman_seqno_reset = reset_time; orig_ifinfo->if_outgoing = if_outgoing; INIT_HLIST_NODE(&orig_ifinfo->list); kref_init(&orig_ifinfo->refcount); kref_get(&orig_ifinfo->refcount); hlist_add_head_rcu(&orig_ifinfo->list, &orig_node->ifinfo_list); out: spin_unlock_bh(&orig_node->neigh_list_lock); return orig_ifinfo; } /** * batadv_neigh_ifinfo_get() - find the ifinfo from an neigh_node * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * * The object is returned with refcounter increased by 1. * * Return: the requested neigh_ifinfo or NULL if not found */ struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_ifinfo *neigh_ifinfo = NULL, *tmp_neigh_ifinfo; rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_ifinfo, &neigh->ifinfo_list, list) { if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) continue; if (!kref_get_unless_zero(&tmp_neigh_ifinfo->refcount)) continue; neigh_ifinfo = tmp_neigh_ifinfo; break; } rcu_read_unlock(); return neigh_ifinfo; } /** * batadv_neigh_ifinfo_new() - search and possibly create an neigh_ifinfo object * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * * Return: NULL in case of failure or the neigh_ifinfo object for the * if_outgoing interface otherwise. The object is created and added to the list * if it does not exist. * * The object is returned with refcounter increased by 1. */ struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_ifinfo *neigh_ifinfo; spin_lock_bh(&neigh->ifinfo_lock); neigh_ifinfo = batadv_neigh_ifinfo_get(neigh, if_outgoing); if (neigh_ifinfo) goto out; neigh_ifinfo = kzalloc(sizeof(*neigh_ifinfo), GFP_ATOMIC); if (!neigh_ifinfo) goto out; if (if_outgoing) kref_get(&if_outgoing->refcount); INIT_HLIST_NODE(&neigh_ifinfo->list); kref_init(&neigh_ifinfo->refcount); neigh_ifinfo->if_outgoing = if_outgoing; kref_get(&neigh_ifinfo->refcount); hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); out: spin_unlock_bh(&neigh->ifinfo_lock); return neigh_ifinfo; } /** * batadv_neigh_node_get() - retrieve a neighbour from the list * @orig_node: originator which the neighbour belongs to * @hard_iface: the interface where this neighbour is connected to * @addr: the address of the neighbour * * Looks for and possibly returns a neighbour belonging to this originator list * which is connected through the provided hard interface. * * Return: neighbor when found. Otherwise NULL */ static struct batadv_neigh_node * batadv_neigh_node_get(const struct batadv_orig_node *orig_node, const struct batadv_hard_iface *hard_iface, const u8 *addr) { struct batadv_neigh_node *tmp_neigh_node, *res = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { if (!batadv_compare_eth(tmp_neigh_node->addr, addr)) continue; if (tmp_neigh_node->if_incoming != hard_iface) continue; if (!kref_get_unless_zero(&tmp_neigh_node->refcount)) continue; res = tmp_neigh_node; break; } rcu_read_unlock(); return res; } /** * batadv_hardif_neigh_create() - create a hardif neighbour node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, struct batadv_orig_node *orig_node) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hardif_neigh_node *hardif_neigh; spin_lock_bh(&hard_iface->neigh_list_lock); /* check if neighbor hasn't been added in the meantime */ hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); if (hardif_neigh) goto out; hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); if (!hardif_neigh) goto out; kref_get(&hard_iface->refcount); INIT_HLIST_NODE(&hardif_neigh->list); ether_addr_copy(hardif_neigh->addr, neigh_addr); ether_addr_copy(hardif_neigh->orig, orig_node->orig); hardif_neigh->if_incoming = hard_iface; hardif_neigh->last_seen = jiffies; kref_init(&hardif_neigh->refcount); if (bat_priv->algo_ops->neigh.hardif_init) bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list); out: spin_unlock_bh(&hard_iface->neigh_list_lock); return hardif_neigh; } /** * batadv_hardif_neigh_get_or_create() - retrieve or create a hardif neighbour * node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, struct batadv_orig_node *orig_node) { struct batadv_hardif_neigh_node *hardif_neigh; /* first check without locking to avoid the overhead */ hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); if (hardif_neigh) return hardif_neigh; return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node); } /** * batadv_hardif_neigh_get() - retrieve a hardif neighbour from the list * @hard_iface: the interface where this neighbour is connected to * @neigh_addr: the address of the neighbour * * Looks for and possibly returns a neighbour belonging to this hard interface. * * Return: neighbor when found. Otherwise NULL */ struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { struct batadv_hardif_neigh_node *tmp_hardif_neigh, *hardif_neigh = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(tmp_hardif_neigh, &hard_iface->neigh_list, list) { if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) continue; if (!kref_get_unless_zero(&tmp_hardif_neigh->refcount)) continue; hardif_neigh = tmp_hardif_neigh; break; } rcu_read_unlock(); return hardif_neigh; } /** * batadv_neigh_node_create() - create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface * * Allocates a new neigh_node object and initialises all the generic fields. * * Return: the neighbour node if found or created or NULL otherwise. */ static struct batadv_neigh_node * batadv_neigh_node_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh = NULL; spin_lock_bh(&orig_node->neigh_list_lock); neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) goto out; hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, neigh_addr, orig_node); if (!hardif_neigh) goto out; neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); if (!neigh_node) goto out; INIT_HLIST_NODE(&neigh_node->list); INIT_HLIST_HEAD(&neigh_node->ifinfo_list); spin_lock_init(&neigh_node->ifinfo_lock); kref_get(&hard_iface->refcount); ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; neigh_node->last_seen = jiffies; /* increment unique neighbor refcount */ kref_get(&hardif_neigh->refcount); neigh_node->hardif_neigh = hardif_neigh; /* extra reference for return */ kref_init(&neigh_node->refcount); kref_get(&neigh_node->refcount); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); out: spin_unlock_bh(&orig_node->neigh_list_lock); batadv_hardif_neigh_put(hardif_neigh); return neigh_node; } /** * batadv_neigh_node_get_or_create() - retrieve or create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface * * Return: the neighbour node if found or created or NULL otherwise. */ struct batadv_neigh_node * batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; /* first check without locking to avoid the overhead */ neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) return neigh_node; return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); } /** * batadv_hardif_neigh_dump() - Dump to netlink the neighbor infos for a * specific outgoing interface * @msg: message to dump into * @cb: parameters for the dump * * Return: 0 or error value */ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct net_device *hard_iface = NULL; struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; int ret; int ifindex, hard_ifindex; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_HARD_IFINDEX); if (hard_ifindex) { hard_iface = dev_get_by_index(net, hard_ifindex); if (hard_iface) hardif = batadv_hardif_get_by_netdev(hard_iface); if (!hardif) { ret = -ENODEV; goto out; } if (hardif->soft_iface != soft_iface) { ret = -ENOENT; goto out; } } if (!bat_priv->algo_ops->neigh.dump) { ret = -EOPNOTSUPP; goto out; } bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif); ret = msg->len; out: batadv_hardif_put(hardif); dev_put(hard_iface); batadv_hardif_put(primary_if); dev_put(soft_iface); return ret; } /** * batadv_orig_ifinfo_release() - release orig_ifinfo from lists and queue for * free after rcu grace period * @ref: kref pointer of the orig_ifinfo */ void batadv_orig_ifinfo_release(struct kref *ref) { struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; orig_ifinfo = container_of(ref, struct batadv_orig_ifinfo, refcount); if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_put(orig_ifinfo->if_outgoing); /* this is the last reference to this object */ router = rcu_dereference_protected(orig_ifinfo->router, true); batadv_neigh_node_put(router); kfree_rcu(orig_ifinfo, rcu); } /** * batadv_orig_node_free_rcu() - free the orig_node * @rcu: rcu pointer of the orig_node */ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { struct batadv_orig_node *orig_node; orig_node = container_of(rcu, struct batadv_orig_node, rcu); batadv_mcast_purge_orig(orig_node); batadv_frag_purge_orig(orig_node, NULL); kfree(orig_node->tt_buff); kfree(orig_node); } /** * batadv_orig_node_release() - release orig_node from lists and queue for * free after rcu grace period * @ref: kref pointer of the orig_node */ void batadv_orig_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_orig_node_vlan *vlan; struct batadv_orig_ifinfo *last_candidate; orig_node = container_of(ref, struct batadv_orig_node, refcount); spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); batadv_neigh_node_put(neigh_node); } hlist_for_each_entry_safe(orig_ifinfo, node_tmp, &orig_node->ifinfo_list, list) { hlist_del_rcu(&orig_ifinfo->list); batadv_orig_ifinfo_put(orig_ifinfo); } last_candidate = orig_node->last_bonding_candidate; orig_node->last_bonding_candidate = NULL; spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(last_candidate); spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) { hlist_del_rcu(&vlan->list); batadv_orig_node_vlan_put(vlan); } spin_unlock_bh(&orig_node->vlan_list_lock); /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } /** * batadv_originator_free() - Free all originator structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; u32 i; if (!hash) return; cancel_delayed_work_sync(&bat_priv->orig_work); bat_priv->orig_hash = NULL; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_put(orig_node); } spin_unlock_bh(list_lock); } batadv_hash_destroy(hash); } /** * batadv_orig_node_new() - creates a new orig_node * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the originator * * Creates a new originator object and initialises all the generic fields. * The new object is not added to the originator list. * * Return: the newly created object or NULL on failure. */ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; struct batadv_orig_node_vlan *vlan; unsigned long reset_time; int i; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); orig_node = kzalloc(sizeof(*orig_node), GFP_ATOMIC); if (!orig_node) return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); INIT_HLIST_HEAD(&orig_node->vlan_list); INIT_HLIST_HEAD(&orig_node->ifinfo_list); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); spin_lock_init(&orig_node->tt_lock); spin_lock_init(&orig_node->vlan_list_lock); batadv_nc_init_orig(orig_node); /* extra reference for return */ kref_init(&orig_node->refcount); orig_node->bat_priv = bat_priv; ether_addr_copy(orig_node->orig, addr); batadv_dat_init_orig_node_addr(orig_node); atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; #ifdef CONFIG_BATMAN_ADV_MCAST orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4; orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6; orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA; INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node); spin_lock_init(&orig_node->mcast_handler_lock); #endif /* create a vlan object for the "untagged" LAN */ vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS); if (!vlan) goto free_orig_node; /* batadv_orig_node_vlan_new() increases the refcounter. * Immediately release vlan since it is not needed anymore in this * context */ batadv_orig_node_vlan_put(vlan); for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { INIT_HLIST_HEAD(&orig_node->fragments[i].fragment_list); spin_lock_init(&orig_node->fragments[i].lock); orig_node->fragments[i].size = 0; } return orig_node; free_orig_node: kfree(orig_node); return NULL; } /** * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor * @bat_priv: the bat priv with all the soft interface information * @neigh: orig node which is to be checked */ static void batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh) { struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_hard_iface *if_outgoing; struct hlist_node *node_tmp; spin_lock_bh(&neigh->ifinfo_lock); /* for all ifinfo objects for this neighinator */ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh->ifinfo_list, list) { if_outgoing = neigh_ifinfo->if_outgoing; /* always keep the default interface */ if (if_outgoing == BATADV_IF_DEFAULT) continue; /* don't purge if the interface is not (going) down */ if (if_outgoing->if_status != BATADV_IF_INACTIVE && if_outgoing->if_status != BATADV_IF_NOT_IN_USE && if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor/ifinfo purge: neighbor %pM, iface: %s\n", neigh->addr, if_outgoing->net_dev->name); hlist_del_rcu(&neigh_ifinfo->list); batadv_neigh_ifinfo_put(neigh_ifinfo); } spin_unlock_bh(&neigh->ifinfo_lock); } /** * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * * Return: true if any ifinfo entry was purged, false otherwise. */ static bool batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_hard_iface *if_outgoing; struct hlist_node *node_tmp; bool ifinfo_purged = false; spin_lock_bh(&orig_node->neigh_list_lock); /* for all ifinfo objects for this originator */ hlist_for_each_entry_safe(orig_ifinfo, node_tmp, &orig_node->ifinfo_list, list) { if_outgoing = orig_ifinfo->if_outgoing; /* always keep the default interface */ if (if_outgoing == BATADV_IF_DEFAULT) continue; /* don't purge if the interface is not (going) down */ if (if_outgoing->if_status != BATADV_IF_INACTIVE && if_outgoing->if_status != BATADV_IF_NOT_IN_USE && if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "router/ifinfo purge: originator %pM, iface: %s\n", orig_node->orig, if_outgoing->net_dev->name); ifinfo_purged = true; hlist_del_rcu(&orig_ifinfo->list); batadv_orig_ifinfo_put(orig_ifinfo); if (orig_node->last_bonding_candidate == orig_ifinfo) { orig_node->last_bonding_candidate = NULL; batadv_orig_ifinfo_put(orig_ifinfo); } } spin_unlock_bh(&orig_node->neigh_list_lock); return ifinfo_purged; } /** * batadv_purge_orig_neighbors() - purges neighbors from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * * Return: true if any neighbor was purged, false otherwise */ static bool batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; struct batadv_hard_iface *if_incoming; spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) || if_incoming->if_status == BATADV_IF_INACTIVE || if_incoming->if_status == BATADV_IF_NOT_IN_USE || if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) { if (if_incoming->if_status == BATADV_IF_INACTIVE || if_incoming->if_status == BATADV_IF_NOT_IN_USE || if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", orig_node->orig, neigh_node->addr, if_incoming->net_dev->name); else batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor timeout: originator %pM, neighbor: %pM, last_seen: %u\n", orig_node->orig, neigh_node->addr, jiffies_to_msecs(last_seen)); neigh_purged = true; hlist_del_rcu(&neigh_node->list); batadv_neigh_node_put(neigh_node); } else { /* only necessary if not the whole neighbor is to be * deleted, but some interface has been removed. */ batadv_purge_neigh_ifinfo(bat_priv, neigh_node); } } spin_unlock_bh(&orig_node->neigh_list_lock); return neigh_purged; } /** * batadv_find_best_neighbor() - finds the best neighbor after purging * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * @if_outgoing: the interface for which the metric should be compared * * Return: the current best neighbor, with refcount increased. */ static struct batadv_neigh_node * batadv_find_best_neighbor(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *best = NULL, *neigh; struct batadv_algo_ops *bao = bat_priv->algo_ops; rcu_read_lock(); hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) { if (best && (bao->neigh.cmp(neigh, if_outgoing, best, if_outgoing) <= 0)) continue; if (!kref_get_unless_zero(&neigh->refcount)) continue; batadv_neigh_node_put(best); best = neigh; } rcu_read_unlock(); return best; } /** * batadv_purge_orig_node() - purges obsolete information from an orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * * This function checks if the orig_node or substructures of it have become * obsolete, and purges this information if that's the case. * * Return: true if the orig_node is to be removed, false otherwise. */ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_neigh_node *best_neigh_node; struct batadv_hard_iface *hard_iface; bool changed_ifinfo, changed_neigh; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Originator timeout: originator %pM, last_seen %u\n", orig_node->orig, jiffies_to_msecs(orig_node->last_seen)); return true; } changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node); changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node); if (!changed_ifinfo && !changed_neigh) return false; /* first for NULL ... */ best_neigh_node = batadv_find_best_neighbor(bat_priv, orig_node, BATADV_IF_DEFAULT); batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, best_neigh_node); batadv_neigh_node_put(best_neigh_node); /* ... then for all other interfaces. */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) continue; best_neigh_node = batadv_find_best_neighbor(bat_priv, orig_node, hard_iface); batadv_update_route(bat_priv, orig_node, hard_iface, best_neigh_node); batadv_neigh_node_put(best_neigh_node); batadv_hardif_put(hard_iface); } rcu_read_unlock(); return false; } /** * batadv_purge_orig_ref() - Purge all outdated originators * @bat_priv: the bat priv with all the soft interface information */ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; u32 i; if (!hash) return; /* for all origins... */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; if (hlist_empty(head)) continue; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { if (batadv_purge_orig_node(bat_priv, orig_node)) { batadv_gw_node_delete(bat_priv, orig_node); hlist_del_rcu(&orig_node->hash_entry); batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1, "originator timed out"); batadv_orig_node_put(orig_node); continue; } batadv_frag_purge_orig(orig_node, batadv_frag_check_entry); } spin_unlock_bh(list_lock); } batadv_gw_election(bat_priv); } static void batadv_purge_orig(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_priv *bat_priv; delayed_work = to_delayed_work(work); bat_priv = container_of(delayed_work, struct batadv_priv, orig_work); batadv_purge_orig_ref(bat_priv); queue_delayed_work(batadv_event_workqueue, &bat_priv->orig_work, msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); } /** * batadv_orig_dump() - Dump to netlink the originator infos for a specific * outgoing interface * @msg: message to dump into * @cb: parameters for the dump * * Return: 0 or error value */ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct net_device *hard_iface = NULL; struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; int ret; int ifindex, hard_ifindex; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_HARD_IFINDEX); if (hard_ifindex) { hard_iface = dev_get_by_index(net, hard_ifindex); if (hard_iface) hardif = batadv_hardif_get_by_netdev(hard_iface); if (!hardif) { ret = -ENODEV; goto out; } if (hardif->soft_iface != soft_iface) { ret = -ENOENT; goto out; } } if (!bat_priv->algo_ops->orig.dump) { ret = -EOPNOTSUPP; goto out; } bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif); ret = msg->len; out: batadv_hardif_put(hardif); dev_put(hard_iface); batadv_hardif_put(primary_if); dev_put(soft_iface); return ret; }
23 23 19 8 40 40 6 40 40 40 43 14 14 14 14 13 13 26 26 26 26 6 26 4 26 26 26 26 26 3 3 3 3 3 6 1 6 6 6 1 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 // SPDX-License-Identifier: GPL-2.0 /* * fs/proc_namespace.c - handling of /proc/<pid>/{mounts,mountinfo,mountstats} * * In fact, that's a piece of procfs; it's *almost* isolated from * the rest of fs/proc, but has rather close relationships with * fs/namespace.c, thus here instead of fs/proc * */ #include <linux/mnt_namespace.h> #include <linux/nsproxy.h> #include <linux/security.h> #include <linux/fs_struct.h> #include <linux/sched/task.h> #include "proc/internal.h" /* only for get_proc_task() in ->open() */ #include "pnode.h" #include "internal.h" static __poll_t mounts_poll(struct file *file, poll_table *wait) { struct seq_file *m = file->private_data; struct proc_mounts *p = m->private; struct mnt_namespace *ns = p->ns; __poll_t res = EPOLLIN | EPOLLRDNORM; int event; poll_wait(file, &p->ns->poll, wait); event = READ_ONCE(ns->event); if (m->poll_event != event) { m->poll_event = event; res |= EPOLLERR | EPOLLPRI; } return res; } struct proc_fs_opts { int flag; const char *str; }; static int show_sb_opts(struct seq_file *m, struct super_block *sb) { static const struct proc_fs_opts fs_opts[] = { { SB_SYNCHRONOUS, ",sync" }, { SB_DIRSYNC, ",dirsync" }, { SB_MANDLOCK, ",mand" }, { SB_LAZYTIME, ",lazytime" }, { 0, NULL } }; const struct proc_fs_opts *fs_infop; for (fs_infop = fs_opts; fs_infop->flag; fs_infop++) { if (sb->s_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } return security_sb_show_options(m, sb); } static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt) { static const struct proc_fs_opts mnt_opts[] = { { MNT_NOSUID, ",nosuid" }, { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, { MNT_NOATIME, ",noatime" }, { MNT_NODIRATIME, ",nodiratime" }, { MNT_RELATIME, ",relatime" }, { MNT_NOSYMFOLLOW, ",nosymfollow" }, { 0, NULL } }; const struct proc_fs_opts *fs_infop; for (fs_infop = mnt_opts; fs_infop->flag; fs_infop++) { if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } if (is_idmapped_mnt(mnt)) seq_puts(m, ",idmapped"); } static inline void mangle(struct seq_file *m, const char *s) { seq_escape(m, s, " \t\n\\#"); } static void show_type(struct seq_file *m, struct super_block *sb) { mangle(m, sb->s_type->name); if (sb->s_subtype) { seq_putc(m, '.'); mangle(m, sb->s_subtype); } } static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) { struct proc_mounts *p = m->private; struct mount *r = real_mount(mnt); struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; int err; if (sb->s_op->show_devname) { err = sb->s_op->show_devname(m, mnt_path.dentry); if (err) goto out; } else { mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } seq_putc(m, ' '); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); if (err) goto out; seq_putc(m, ' '); show_type(m, sb); seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; show_vfsmnt_opts(m, mnt); if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: return err; } static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) { struct proc_mounts *p = m->private; struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; int err; seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id, MAJOR(sb->s_dev), MINOR(sb->s_dev)); err = show_path(m, mnt->mnt_root); if (err) goto out; seq_putc(m, ' '); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); if (err) goto out; seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); show_vfsmnt_opts(m, mnt); /* Tagged fields ("foo:X" or "bar") */ if (IS_MNT_SHARED(r)) seq_printf(m, " shared:%i", r->mnt_group_id); if (IS_MNT_SLAVE(r)) { int master = r->mnt_master->mnt_group_id; int dom = get_dominating_id(r, &p->root); seq_printf(m, " master:%i", master); if (dom && dom != master) seq_printf(m, " propagate_from:%i", dom); } if (IS_MNT_UNBINDABLE(r)) seq_puts(m, " unbindable"); /* Filesystem specific data */ seq_puts(m, " - "); show_type(m, sb); seq_putc(m, ' '); if (sb->s_op->show_devname) { err = sb->s_op->show_devname(m, mnt->mnt_root); if (err) goto out; } else { mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } seq_puts(m, sb_rdonly(sb) ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out: return err; } static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) { struct proc_mounts *p = m->private; struct mount *r = real_mount(mnt); struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; int err; /* device */ if (sb->s_op->show_devname) { seq_puts(m, "device "); err = sb->s_op->show_devname(m, mnt_path.dentry); if (err) goto out; } else { if (r->mnt_devname) { seq_puts(m, "device "); mangle(m, r->mnt_devname); } else seq_puts(m, "no device"); } /* mount point */ seq_puts(m, " mounted on "); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); if (err) goto out; seq_putc(m, ' '); /* file system type */ seq_puts(m, "with fstype "); show_type(m, sb); /* optional statistics */ if (sb->s_op->show_stats) { seq_putc(m, ' '); err = sb->s_op->show_stats(m, mnt_path.dentry); } seq_putc(m, '\n'); out: return err; } static int mounts_open_common(struct inode *inode, struct file *file, int (*show)(struct seq_file *, struct vfsmount *)) { struct task_struct *task = get_proc_task(inode); struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct path root; struct proc_mounts *p; struct seq_file *m; int ret = -EINVAL; if (!task) goto err; task_lock(task); nsp = task->nsproxy; if (!nsp || !nsp->mnt_ns) { task_unlock(task); put_task_struct(task); goto err; } ns = nsp->mnt_ns; get_mnt_ns(ns); if (!task->fs) { task_unlock(task); put_task_struct(task); ret = -ENOENT; goto err_put_ns; } get_fs_root(task->fs, &root); task_unlock(task); put_task_struct(task); ret = seq_open_private(file, &mounts_op, sizeof(struct proc_mounts)); if (ret) goto err_put_path; m = file->private_data; m->poll_event = ns->event; p = m->private; p->ns = ns; p->root = root; p->show = show; return 0; err_put_path: path_put(&root); err_put_ns: put_mnt_ns(ns); err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct proc_mounts *p = m->private; path_put(&p->root); put_mnt_ns(p->ns); return seq_release_private(inode, file); } static int mounts_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsmnt); } static int mountinfo_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_mountinfo); } static int mountstats_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsstat); } const struct file_operations proc_mounts_operations = { .open = mounts_open, .read_iter = seq_read_iter, .splice_read = copy_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, .read_iter = seq_read_iter, .splice_read = copy_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountstats_operations = { .open = mountstats_open, .read_iter = seq_read_iter, .splice_read = copy_splice_read, .llseek = seq_lseek, .release = mounts_release, };
1 33 2 8 41 41 81 58 32 81 81 81 58 32 78 13 63 19 31 10 10 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * * Authors: * John McCutchan <ttb@tentacle.dhs.org> * Robert Love <rml@novell.com> * * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/inotify.h> #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/sched/mm.h> #include "inotify.h" /* * Check if 2 events contain the same information. */ static bool event_compare(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { struct inotify_event_info *old, *new; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && (old->wd == new->wd) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; return false; } static int inotify_merge(struct fsnotify_group *group, struct fsnotify_event *event) { struct list_head *list = &group->notification_list; struct fsnotify_event *last_event; last_event = list_entry(list->prev, struct fsnotify_event, list); return event_compare(last_event, event); } int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; if (name) { len = name->len; alloc_len += len + 1; } pr_debug("%s: group=%p mark=%p mask=%x\n", __func__, group, inode_mark, mask); i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); /* * We can be racing with mark being detached. Don't report event with * invalid wd. */ wd = READ_ONCE(i_mark->wd); if (wd == -1) return 0; /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have * security repercussion. */ old_memcg = set_active_memcg(group->memcg); event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); set_active_memcg(old_memcg); if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue * overflow to let userspace know event was lost. */ fsnotify_queue_overflow(group); return -ENOMEM; } /* * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events * for fanotify. inotify never reported IN_ISDIR with those events. * It looks like an oversight, but to avoid the risk of breaking * existing inotify programs, mask the flag out from those events. */ if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) mask &= ~IN_ISDIR; fsn_event = &event->fse; fsnotify_init_event(fsn_event); event->mask = mask; event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) strcpy(event->name, name->name); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); } if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; } static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); } /* * This is NEVER supposed to be called. Inotify marks should either have been * removed from the idr when the watch was removed or in the * fsnotify_destroy_mark_by_group() call when the inotify instance was being * torn down. This is only called if the idr is about to be freed but there * are still marks in it. */ static int idr_callback(int id, void *p, void *data) { struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; static bool warned = false; if (warned) return 0; warned = true; fsn_mark = p; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in " "idr. Probably leaking memory\n", id, p, data); /* * I'm taking the liberty of assuming that the mark in question is a * valid address and I'm dereferencing it. This might help to figure * out why we got here and the panic is no worse than the original * BUG() that was here. */ if (fsn_mark) printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", fsn_mark->group, i_mark->wd); return 0; } static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); if (group->inotify_data.ucounts) dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } /* ding dong the mark is dead */ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) { struct inotify_inode_mark *i_mark; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); kmem_cache_free(inotify_inode_mark_cachep, i_mark); } const struct fsnotify_ops inotify_fsnotify_ops = { .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, .free_mark = inotify_free_mark, };
24 6 41 10 10 10 10 10 10 10 511 507 30 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PTRACE_H #define _LINUX_PTRACE_H #include <linux/compiler.h> /* For unlikely. */ #include <linux/sched.h> /* For struct task_struct. */ #include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ #include <linux/err.h> /* for IS_ERR_VALUE */ #include <linux/bug.h> /* For BUG_ON. */ #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ #include <uapi/linux/ptrace.h> #include <linux/seccomp.h> /* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ struct syscall_info { __u64 sp; struct seccomp_data data; }; extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); /* * Ptrace flags * * The owner ship rules for task->ptrace which holds the ptrace * flags is simple. When a task is running it owns it's task->ptrace * flags. When the a task is stopped the ptracer owns task->ptrace. */ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ #define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) #define PT_TRACESYSGOOD PT_EVENT_FLAG(0) #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) #define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC) #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) extern long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern void ptrace_disable(struct task_struct *); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern int ptrace_notify(int exit_code, unsigned long message); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 #define PTRACE_MODE_FSCREDS 0x08 #define PTRACE_MODE_REALCREDS 0x10 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) /** * ptrace_may_access - check whether the caller is permitted to access * a target task. * @task: target task * @mode: selects type of access and caller credentials * * Returns true on success, false on denial. * * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must * be set in @mode to specify whether the access was requested through * a filesystem syscall (should use effective capabilities and fsuid * of the caller) or through an explicit syscall such as * process_vm_writev or ptrace (and should use the real credentials). */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); } static inline void ptrace_unlink(struct task_struct *child) { if (unlikely(child->ptrace)) __ptrace_unlink(child); } int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long data); int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, unsigned long data); /** * ptrace_parent - return the task that is tracing the given task * @task: task to consider * * Returns %NULL if no one is tracing @task, or the &struct task_struct * pointer to its tracer. * * Must called under rcu_read_lock(). The pointer returned might be kept * live only by RCU. During exec, this may be called with task_lock() held * on @task, still held from when check_unsafe_exec() was called. */ static inline struct task_struct *ptrace_parent(struct task_struct *task) { if (unlikely(task->ptrace)) return rcu_dereference(task->parent); return NULL; } /** * ptrace_event_enabled - test whether a ptrace event is enabled * @task: ptracee of interest * @event: %PTRACE_EVENT_* to test * * Test whether @event is enabled for ptracee @task. * * Returns %true if @event is enabled, %false otherwise. */ static inline bool ptrace_event_enabled(struct task_struct *task, int event) { return task->ptrace & PT_EVENT_FLAG(event); } /** * ptrace_event - possibly stop for a ptrace event notification * @event: %PTRACE_EVENT_* value to report * @message: value for %PTRACE_GETEVENTMSG to return * * Check whether @event is enabled and, if so, report @event and @message * to the ptrace parent. * * Called without locks. */ static inline void ptrace_event(int event, unsigned long message) { if (unlikely(ptrace_event_enabled(current, event))) { ptrace_notify((event << 8) | SIGTRAP, message); } else if (event == PTRACE_EVENT_EXEC) { /* legacy EXEC report via SIGTRAP */ if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) send_sig(SIGTRAP, current, 0); } } /** * ptrace_event_pid - possibly stop for a ptrace event notification * @event: %PTRACE_EVENT_* value to report * @pid: process identifier for %PTRACE_GETEVENTMSG to return * * Check whether @event is enabled and, if so, report @event and @pid * to the ptrace parent. @pid is reported as the pid_t seen from the * ptrace parent's pid namespace. * * Called without locks. */ static inline void ptrace_event_pid(int event, struct pid *pid) { /* * FIXME: There's a potential race if a ptracer in a different pid * namespace than parent attaches between computing message below and * when we acquire tasklist_lock in ptrace_stop(). If this happens, * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. */ unsigned long message = 0; struct pid_namespace *ns; rcu_read_lock(); ns = task_active_pid_ns(rcu_dereference(current->parent)); if (ns) message = pid_nr_ns(pid, ns); rcu_read_unlock(); ptrace_event(event, message); } /** * ptrace_init_task - initialize ptrace state for a new child * @child: new child task * @ptrace: true if child should be ptrace'd by parent's tracer * * This is called immediately after adding @child to its parent's children * list. @ptrace is false in the normal case, and true to ptrace @child. * * Called with current's siglock and write_lock_irq(&tasklist_lock) held. */ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) { INIT_LIST_HEAD(&child->ptrace_entry); INIT_LIST_HEAD(&child->ptraced); child->jobctl = 0; child->ptrace = 0; child->parent = child->real_parent; if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); else sigaddset(&child->pending.signal, SIGSTOP); } else child->ptracer_cred = NULL; } /** * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped * @task: task in %EXIT_DEAD state * * Called with write_lock(&tasklist_lock) held. */ static inline void ptrace_release_task(struct task_struct *task) { BUG_ON(!list_empty(&task->ptraced)); ptrace_unlink(task); BUG_ON(!list_empty(&task->ptrace_entry)); } #ifndef force_successful_syscall_return /* * System call handlers that, upon successful completion, need to return a * negative value should call force_successful_syscall_return() right before * returning. On architectures where the syscall convention provides for a * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly * others), this macro can be used to ensure that the error flag will not get * set. On architectures which do not support a separate error flag, the macro * is a no-op and the spurious error condition needs to be filtered out by some * other means (e.g., in user-level, by passing an extra argument to the * syscall handler, or something along those lines). */ #define force_successful_syscall_return() do { } while (0) #endif #ifndef is_syscall_success /* * On most systems we can tell if a syscall is a success based on if the retval * is an error value. On some systems like ia64 and powerpc they have different * indicators of success/failure and must define their own. */ #define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) #endif /* * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. * * These do-nothing inlines are used when the arch does not * implement single-step. The kerneldoc comments are here * to document the interface for all arch definitions. */ #ifndef arch_has_single_step /** * arch_has_single_step - does this CPU support user-mode single-step? * * If this is defined, then there must be function declarations or * inlines for user_enable_single_step() and user_disable_single_step(). * arch_has_single_step() should evaluate to nonzero iff the machine * supports instruction single-step for user mode. * It can be a constant or it can test a CPU feature bit. */ #define arch_has_single_step() (0) /** * user_enable_single_step - single-step in user-mode task * @task: either current or a task stopped in %TASK_TRACED * * This can only be called when arch_has_single_step() has returned nonzero. * Set @task so that when it returns to user mode, it will trap after the * next single instruction executes. If arch_has_block_step() is defined, * this must clear the effects of user_enable_block_step() too. */ static inline void user_enable_single_step(struct task_struct *task) { BUG(); /* This can never be called. */ } /** * user_disable_single_step - cancel user-mode single-step * @task: either current or a task stopped in %TASK_TRACED * * Clear @task of the effects of user_enable_single_step() and * user_enable_block_step(). This can be called whether or not either * of those was ever called on @task, and even if arch_has_single_step() * returned zero. */ static inline void user_disable_single_step(struct task_struct *task) { } #else extern void user_enable_single_step(struct task_struct *); extern void user_disable_single_step(struct task_struct *); #endif /* arch_has_single_step */ #ifndef arch_has_block_step /** * arch_has_block_step - does this CPU support user-mode block-step? * * If this is defined, then there must be a function declaration or inline * for user_enable_block_step(), and arch_has_single_step() must be defined * too. arch_has_block_step() should evaluate to nonzero iff the machine * supports step-until-branch for user mode. It can be a constant or it * can test a CPU feature bit. */ #define arch_has_block_step() (0) /** * user_enable_block_step - step until branch in user-mode task * @task: either current or a task stopped in %TASK_TRACED * * This can only be called when arch_has_block_step() has returned nonzero, * and will never be called when single-instruction stepping is being used. * Set @task so that when it returns to user mode, it will trap after the * next branch or trap taken. */ static inline void user_enable_block_step(struct task_struct *task) { BUG(); /* This can never be called. */ } #else extern void user_enable_block_step(struct task_struct *); #endif /* arch_has_block_step */ #ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT extern void user_single_step_report(struct pt_regs *regs); #else static inline void user_single_step_report(struct pt_regs *regs) { kernel_siginfo_t info; clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = SI_USER; info.si_pid = 0; info.si_uid = 0; force_sig_info(&info); } #endif #ifndef arch_ptrace_stop_needed /** * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called * * This is called with the siglock held, to decide whether or not it's * necessary to release the siglock and call arch_ptrace_stop(). It can be * defined to a constant if arch_ptrace_stop() is never required, or always * is. On machines where this makes sense, it should be defined to a quick * test to optimize out calling arch_ptrace_stop() when it would be * superfluous. For example, if the thread has not been back to user mode * since the last stop, the thread state might indicate that nothing needs * to be done. * * This is guaranteed to be invoked once before a task stops for ptrace and * may include arch-specific operations necessary prior to a ptrace stop. */ #define arch_ptrace_stop_needed() (0) #endif #ifndef arch_ptrace_stop /** * arch_ptrace_stop - Do machine-specific work before stopping for ptrace * * This is called with no locks held when arch_ptrace_stop_needed() has * just returned nonzero. It is allowed to block, e.g. for user memory * access. The arch can have machine-specific work to be done before * ptrace stops. On ia64, register backing store gets written back to user * memory here. Since this can be costly (requires dropping the siglock), * we only do it when the arch requires it for this particular stop, as * indicated by arch_ptrace_stop_needed(). */ #define arch_ptrace_stop() do { } while (0) #endif #ifndef current_pt_regs #define current_pt_regs() task_pt_regs(current) #endif #ifndef current_user_stack_pointer #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif #ifndef exception_ip #define exception_ip(x) instruction_pointer(x) #endif extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); /* * ptrace report for syscall entry and exit looks identical. */ static inline int ptrace_report_syscall(unsigned long message) { int ptrace = current->ptrace; int signr; if (!(ptrace & PT_PTRACED)) return 0; signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), message); /* * this isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl */ if (signr) send_sig(signr, current, 1); return fatal_signal_pending(current); } /** * ptrace_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just * entered the kernel for a system call. Full user register state is * available here. Changing the values in @regs can affect the system * call number and arguments to be tried. It is safe to block here, * preventing the system call from beginning. * * Returns zero normally, or nonzero if the calling arch code should abort * the system call. That must prevent normal entry so no system call is * made. If @task ever returns to user mode after this, its register state * is unspecified, but should be something harmless like an %ENOSYS error * return. It should preserve enough information so that syscall_rollback() * can work (see asm-generic/syscall.h). * * Called without locks, just after entering kernel mode. */ static inline __must_check int ptrace_report_syscall_entry( struct pt_regs *regs) { return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); } /** * ptrace_report_syscall_exit - task has just finished a system call * @regs: user register state of current task * @step: nonzero if simulating single-step or block-step * * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when * the current task has just finished an attempted system call. Full * user register state is available here. It is safe to block here, * preventing signals from being processed. * * If @step is nonzero, this report is also in lieu of the normal * trap that would follow the system call instruction because * user_enable_block_step() or user_enable_single_step() was used. * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. * * Called without locks, just before checking for pending signals. */ static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) { if (step) user_single_step_report(regs); else ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); } #endif
6 2 5 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 // SPDX-License-Identifier: GPL-2.0-only /* * ppp_deflate.c - interface the zlib procedures for Deflate compression * and decompression (as used by gzip) to the PPP code. * * Copyright 1994-1998 Paul Mackerras. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/string.h> #include <linux/ppp_defs.h> #include <linux/ppp-comp.h> #include <linux/zlib.h> #include <linux/unaligned.h> /* * State for a Deflate (de)compressor. */ struct ppp_deflate_state { int seqno; int w_size; int unit; int mru; int debug; z_stream strm; struct compstat stats; }; #define DEFLATE_OVHD 2 /* Deflate overhead/packet */ static void *z_comp_alloc(unsigned char *options, int opt_len); static void *z_decomp_alloc(unsigned char *options, int opt_len); static void z_comp_free(void *state); static void z_decomp_free(void *state); static int z_comp_init(void *state, unsigned char *options, int opt_len, int unit, int hdrlen, int debug); static int z_decomp_init(void *state, unsigned char *options, int opt_len, int unit, int hdrlen, int mru, int debug); static int z_compress(void *state, unsigned char *rptr, unsigned char *obuf, int isize, int osize); static void z_incomp(void *state, unsigned char *ibuf, int icnt); static int z_decompress(void *state, unsigned char *ibuf, int isize, unsigned char *obuf, int osize); static void z_comp_reset(void *state); static void z_decomp_reset(void *state); static void z_comp_stats(void *state, struct compstat *stats); /** * z_comp_free - free the memory used by a compressor * @arg: pointer to the private state for the compressor. */ static void z_comp_free(void *arg) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; if (state) { zlib_deflateEnd(&state->strm); vfree(state->strm.workspace); kfree(state); } } /** * z_comp_alloc - allocate space for a compressor. * @options: pointer to CCP option data * @opt_len: length of the CCP option at @options. * * The @options pointer points to the a buffer containing the * CCP option data for the compression being negotiated. It is * formatted according to RFC1979, and describes the window * size that the peer is requesting that we use in compressing * data to be sent to it. * * Returns the pointer to the private state for the compressor, * or NULL if we could not allocate enough memory. */ static void *z_comp_alloc(unsigned char *options, int opt_len) { struct ppp_deflate_state *state; int w_size; if (opt_len != CILEN_DEFLATE || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) || options[1] != CILEN_DEFLATE || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL || options[3] != DEFLATE_CHK_SEQUENCE) return NULL; w_size = DEFLATE_SIZE(options[2]); if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; state->strm.next_in = NULL; state->w_size = w_size; state->strm.workspace = vmalloc(zlib_deflate_workspacesize(-w_size, 8)); if (state->strm.workspace == NULL) goto out_free; if (zlib_deflateInit2(&state->strm, Z_DEFAULT_COMPRESSION, DEFLATE_METHOD_VAL, -w_size, 8, Z_DEFAULT_STRATEGY) != Z_OK) goto out_free; return (void *) state; out_free: z_comp_free(state); return NULL; } /** * z_comp_init - initialize a previously-allocated compressor. * @arg: pointer to the private state for the compressor * @options: pointer to the CCP option data describing the * compression that was negotiated with the peer * @opt_len: length of the CCP option data at @options * @unit: PPP unit number for diagnostic messages * @hdrlen: ignored (present for backwards compatibility) * @debug: debug flag; if non-zero, debug messages are printed. * * The CCP options described by @options must match the options * specified when the compressor was allocated. The compressor * history is reset. Returns 0 for failure (CCP options don't * match) or 1 for success. */ static int z_comp_init(void *arg, unsigned char *options, int opt_len, int unit, int hdrlen, int debug) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; if (opt_len < CILEN_DEFLATE || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) || options[1] != CILEN_DEFLATE || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL || DEFLATE_SIZE(options[2]) != state->w_size || options[3] != DEFLATE_CHK_SEQUENCE) return 0; state->seqno = 0; state->unit = unit; state->debug = debug; zlib_deflateReset(&state->strm); return 1; } /** * z_comp_reset - reset a previously-allocated compressor. * @arg: pointer to private state for the compressor. * * This clears the history for the compressor and makes it * ready to start emitting a new compressed stream. */ static void z_comp_reset(void *arg) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; state->seqno = 0; zlib_deflateReset(&state->strm); } /** * z_compress - compress a PPP packet with Deflate compression. * @arg: pointer to private state for the compressor * @rptr: uncompressed packet (input) * @obuf: compressed packet (output) * @isize: size of uncompressed packet * @osize: space available at @obuf * * Returns the length of the compressed packet, or 0 if the * packet is incompressible. */ static int z_compress(void *arg, unsigned char *rptr, unsigned char *obuf, int isize, int osize) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; int r, proto, off, olen, oavail; unsigned char *wptr; /* * Check that the protocol is in the range we handle. */ proto = PPP_PROTOCOL(rptr); if (proto > 0x3fff || proto == 0xfd || proto == 0xfb) return 0; /* Don't generate compressed packets which are larger than the uncompressed packet. */ if (osize > isize) osize = isize; wptr = obuf; /* * Copy over the PPP header and store the 2-byte sequence number. */ wptr[0] = PPP_ADDRESS(rptr); wptr[1] = PPP_CONTROL(rptr); put_unaligned_be16(PPP_COMP, wptr + 2); wptr += PPP_HDRLEN; put_unaligned_be16(state->seqno, wptr); wptr += DEFLATE_OVHD; olen = PPP_HDRLEN + DEFLATE_OVHD; state->strm.next_out = wptr; state->strm.avail_out = oavail = osize - olen; ++state->seqno; off = (proto > 0xff) ? 2 : 3; /* skip 1st proto byte if 0 */ rptr += off; state->strm.next_in = rptr; state->strm.avail_in = (isize - off); for (;;) { r = zlib_deflate(&state->strm, Z_PACKET_FLUSH); if (r != Z_OK) { if (state->debug) printk(KERN_ERR "z_compress: deflate returned %d\n", r); break; } if (state->strm.avail_out == 0) { olen += oavail; state->strm.next_out = NULL; state->strm.avail_out = oavail = 1000000; } else { break; /* all done */ } } olen += oavail - state->strm.avail_out; /* * See if we managed to reduce the size of the packet. */ if (olen < isize && olen <= osize) { state->stats.comp_bytes += olen; state->stats.comp_packets++; } else { state->stats.inc_bytes += isize; state->stats.inc_packets++; olen = 0; } state->stats.unc_bytes += isize; state->stats.unc_packets++; return olen; } /** * z_comp_stats - return compression statistics for a compressor * or decompressor. * @arg: pointer to private space for the (de)compressor * @stats: pointer to a struct compstat to receive the result. */ static void z_comp_stats(void *arg, struct compstat *stats) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; *stats = state->stats; } /** * z_decomp_free - Free the memory used by a decompressor. * @arg: pointer to private space for the decompressor. */ static void z_decomp_free(void *arg) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; if (state) { vfree(state->strm.workspace); kfree(state); } } /** * z_decomp_alloc - allocate space for a decompressor. * @options: pointer to CCP option data * @opt_len: length of the CCP option at @options. * * The @options pointer points to the a buffer containing the * CCP option data for the compression being negotiated. It is * formatted according to RFC1979, and describes the window * size that we are requesting the peer to use in compressing * data to be sent to us. * * Returns the pointer to the private state for the decompressor, * or NULL if we could not allocate enough memory. */ static void *z_decomp_alloc(unsigned char *options, int opt_len) { struct ppp_deflate_state *state; int w_size; if (opt_len != CILEN_DEFLATE || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) || options[1] != CILEN_DEFLATE || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL || options[3] != DEFLATE_CHK_SEQUENCE) return NULL; w_size = DEFLATE_SIZE(options[2]); if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; state->w_size = w_size; state->strm.next_out = NULL; state->strm.workspace = vmalloc(zlib_inflate_workspacesize()); if (state->strm.workspace == NULL) goto out_free; if (zlib_inflateInit2(&state->strm, -w_size) != Z_OK) goto out_free; return (void *) state; out_free: z_decomp_free(state); return NULL; } /** * z_decomp_init - initialize a previously-allocated decompressor. * @arg: pointer to the private state for the decompressor * @options: pointer to the CCP option data describing the * compression that was negotiated with the peer * @opt_len: length of the CCP option data at @options * @unit: PPP unit number for diagnostic messages * @hdrlen: ignored (present for backwards compatibility) * @mru: maximum length of decompressed packets * @debug: debug flag; if non-zero, debug messages are printed. * * The CCP options described by @options must match the options * specified when the decompressor was allocated. The decompressor * history is reset. Returns 0 for failure (CCP options don't * match) or 1 for success. */ static int z_decomp_init(void *arg, unsigned char *options, int opt_len, int unit, int hdrlen, int mru, int debug) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; if (opt_len < CILEN_DEFLATE || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) || options[1] != CILEN_DEFLATE || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL || DEFLATE_SIZE(options[2]) != state->w_size || options[3] != DEFLATE_CHK_SEQUENCE) return 0; state->seqno = 0; state->unit = unit; state->debug = debug; state->mru = mru; zlib_inflateReset(&state->strm); return 1; } /** * z_decomp_reset - reset a previously-allocated decompressor. * @arg: pointer to private state for the decompressor. * * This clears the history for the decompressor and makes it * ready to receive a new compressed stream. */ static void z_decomp_reset(void *arg) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; state->seqno = 0; zlib_inflateReset(&state->strm); } /** * z_decompress - decompress a Deflate-compressed packet. * @arg: pointer to private state for the decompressor * @ibuf: pointer to input (compressed) packet data * @isize: length of input packet * @obuf: pointer to space for output (decompressed) packet * @osize: amount of space available at @obuf * * Because of patent problems, we return DECOMP_ERROR for errors * found by inspecting the input data and for system problems, but * DECOMP_FATALERROR for any errors which could possibly be said to * be being detected "after" decompression. For DECOMP_ERROR, * we can issue a CCP reset-request; for DECOMP_FATALERROR, we may be * infringing a patent of Motorola's if we do, so we take CCP down * instead. * * Given that the frame has the correct sequence number and a good FCS, * errors such as invalid codes in the input most likely indicate a * bug, so we return DECOMP_FATALERROR for them in order to turn off * compression, even though they are detected by inspecting the input. */ static int z_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, int osize) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; int olen, seq, r; int decode_proto, overflow; unsigned char overflow_buf[1]; if (isize <= PPP_HDRLEN + DEFLATE_OVHD) { if (state->debug) printk(KERN_DEBUG "z_decompress%d: short pkt (%d)\n", state->unit, isize); return DECOMP_ERROR; } /* Check the sequence number. */ seq = get_unaligned_be16(ibuf + PPP_HDRLEN); if (seq != (state->seqno & 0xffff)) { if (state->debug) printk(KERN_DEBUG "z_decompress%d: bad seq # %d, expected %d\n", state->unit, seq, state->seqno & 0xffff); return DECOMP_ERROR; } ++state->seqno; /* * Fill in the first part of the PPP header. The protocol field * comes from the decompressed data. */ obuf[0] = PPP_ADDRESS(ibuf); obuf[1] = PPP_CONTROL(ibuf); obuf[2] = 0; /* * Set up to call inflate. We set avail_out to 1 initially so we can * look at the first byte of the output and decide whether we have * a 1-byte or 2-byte protocol field. */ state->strm.next_in = ibuf + PPP_HDRLEN + DEFLATE_OVHD; state->strm.avail_in = isize - (PPP_HDRLEN + DEFLATE_OVHD); state->strm.next_out = obuf + 3; state->strm.avail_out = 1; decode_proto = 1; overflow = 0; /* * Call inflate, supplying more input or output as needed. */ for (;;) { r = zlib_inflate(&state->strm, Z_PACKET_FLUSH); if (r != Z_OK) { if (state->debug) printk(KERN_DEBUG "z_decompress%d: inflate returned %d (%s)\n", state->unit, r, (state->strm.msg? state->strm.msg: "")); return DECOMP_FATALERROR; } if (state->strm.avail_out != 0) break; /* all done */ if (decode_proto) { state->strm.avail_out = osize - PPP_HDRLEN; if ((obuf[3] & 1) == 0) { /* 2-byte protocol field */ obuf[2] = obuf[3]; --state->strm.next_out; ++state->strm.avail_out; } decode_proto = 0; } else if (!overflow) { /* * We've filled up the output buffer; the only way to * find out whether inflate has any more characters * left is to give it another byte of output space. */ state->strm.next_out = overflow_buf; state->strm.avail_out = 1; overflow = 1; } else { if (state->debug) printk(KERN_DEBUG "z_decompress%d: ran out of mru\n", state->unit); return DECOMP_FATALERROR; } } if (decode_proto) { if (state->debug) printk(KERN_DEBUG "z_decompress%d: didn't get proto\n", state->unit); return DECOMP_ERROR; } olen = osize + overflow - state->strm.avail_out; state->stats.unc_bytes += olen; state->stats.unc_packets++; state->stats.comp_bytes += isize; state->stats.comp_packets++; return olen; } /** * z_incomp - add incompressible input data to the history. * @arg: pointer to private state for the decompressor * @ibuf: pointer to input packet data * @icnt: length of input data. */ static void z_incomp(void *arg, unsigned char *ibuf, int icnt) { struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; int proto, r; /* * Check that the protocol is one we handle. */ proto = PPP_PROTOCOL(ibuf); if (proto > 0x3fff || proto == 0xfd || proto == 0xfb) return; ++state->seqno; /* * We start at the either the 1st or 2nd byte of the protocol field, * depending on whether the protocol value is compressible. */ state->strm.next_in = ibuf + 3; state->strm.avail_in = icnt - 3; if (proto > 0xff) { --state->strm.next_in; ++state->strm.avail_in; } r = zlib_inflateIncomp(&state->strm); if (r != Z_OK) { /* gak! */ if (state->debug) { printk(KERN_DEBUG "z_incomp%d: inflateIncomp returned %d (%s)\n", state->unit, r, (state->strm.msg? state->strm.msg: "")); } return; } /* * Update stats. */ state->stats.inc_bytes += icnt; state->stats.inc_packets++; state->stats.unc_bytes += icnt; state->stats.unc_packets++; } /************************************************************* * Module interface table *************************************************************/ /* These are in ppp_generic.c */ extern int ppp_register_compressor (struct compressor *cp); extern void ppp_unregister_compressor (struct compressor *cp); /* * Procedures exported to if_ppp.c. */ static struct compressor ppp_deflate = { .compress_proto = CI_DEFLATE, .comp_alloc = z_comp_alloc, .comp_free = z_comp_free, .comp_init = z_comp_init, .comp_reset = z_comp_reset, .compress = z_compress, .comp_stat = z_comp_stats, .decomp_alloc = z_decomp_alloc, .decomp_free = z_decomp_free, .decomp_init = z_decomp_init, .decomp_reset = z_decomp_reset, .decompress = z_decompress, .incomp = z_incomp, .decomp_stat = z_comp_stats, .owner = THIS_MODULE }; static struct compressor ppp_deflate_draft = { .compress_proto = CI_DEFLATE_DRAFT, .comp_alloc = z_comp_alloc, .comp_free = z_comp_free, .comp_init = z_comp_init, .comp_reset = z_comp_reset, .compress = z_compress, .comp_stat = z_comp_stats, .decomp_alloc = z_decomp_alloc, .decomp_free = z_decomp_free, .decomp_init = z_decomp_init, .decomp_reset = z_decomp_reset, .decompress = z_decompress, .incomp = z_incomp, .decomp_stat = z_comp_stats, .owner = THIS_MODULE }; static int __init deflate_init(void) { int rc; rc = ppp_register_compressor(&ppp_deflate); if (rc) return rc; rc = ppp_register_compressor(&ppp_deflate_draft); if (rc) { ppp_unregister_compressor(&ppp_deflate); return rc; } pr_info("PPP Deflate Compression module registered\n"); return 0; } static void __exit deflate_cleanup(void) { ppp_unregister_compressor(&ppp_deflate); ppp_unregister_compressor(&ppp_deflate_draft); } module_init(deflate_init); module_exit(deflate_cleanup); MODULE_DESCRIPTION("PPP Deflate compression module"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE)); MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE_DRAFT));
27 7 10 7 8 3 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_FUTEX_H #define _ASM_X86_FUTEX_H #ifdef __KERNEL__ #include <linux/futex.h> #include <linux/uaccess.h> #include <asm/asm.h> #include <asm/errno.h> #include <asm/processor.h> #include <asm/smap.h> #define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \ do { \ int oldval = 0, ret; \ asm volatile("1:\t" insn "\n" \ "2:\n" \ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ : "0" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ } while(0) #define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \ do { \ int oldval = 0, ret, tem; \ asm volatile("1:\tmovl %2, %0\n" \ "2:\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t2b\n" \ "4:\n" \ _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \ _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ } while(0) static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; switch (op) { case FUTEX_OP_SET: unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ADD: unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault); break; case FUTEX_OP_OR: unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ANDN: unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); break; case FUTEX_OP_XOR: unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); break; default: user_access_end(); return -ENOSYS; } user_access_end(); return 0; Efault: user_access_end(); return -EFAULT; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; asm volatile("\n" "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" "2:\n" _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ : "+r" (ret), "=a" (oldval), "+m" (*uaddr) : "r" (newval), "1" (oldval) : "memory" ); user_access_end(); *uval = oldval; return ret; } #endif #endif /* _ASM_X86_FUTEX_H */
4 13 2 12 1 1 1 17 17 58 4 4 59 58 4 4 4 59 4 4 4 4 4 4 4 4 4 12 12 17 12 4 17 17 17 17 17 14 14 14 14 12 59 3 1 2 2 1 1 1 59 59 59 59 102 102 102 102 102 59 59 59 59 59 59 58 59 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 // SPDX-License-Identifier: GPL-2.0-only /* * IEEE 802.1Q Multiple Registration Protocol (MRP) * * Copyright (c) 2012 Massachusetts Institute of Technology * * Adapted from code in net/802/garp.c * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/timer.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/module.h> #include <net/mrp.h> #include <linux/unaligned.h> static unsigned int mrp_join_time __read_mostly = 200; module_param(mrp_join_time, uint, 0644); MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); static unsigned int mrp_periodic_time __read_mostly = 1000; module_param(mrp_periodic_time, uint, 0644); MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); MODULE_DESCRIPTION("IEEE 802.1Q Multiple Registration Protocol (MRP)"); MODULE_LICENSE("GPL"); static const u8 mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = { [MRP_APPLICANT_VO] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, [MRP_EVENT_LV] = MRP_APPLICANT_VO, [MRP_EVENT_TX] = MRP_APPLICANT_VO, [MRP_EVENT_R_NEW] = MRP_APPLICANT_VO, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AO, [MRP_EVENT_R_IN] = MRP_APPLICANT_VO, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VO, [MRP_EVENT_R_MT] = MRP_APPLICANT_VO, [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VO, }, [MRP_APPLICANT_VP] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, [MRP_EVENT_LV] = MRP_APPLICANT_VO, [MRP_EVENT_TX] = MRP_APPLICANT_AA, [MRP_EVENT_R_NEW] = MRP_APPLICANT_VP, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AP, [MRP_EVENT_R_IN] = MRP_APPLICANT_VP, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VP, [MRP_EVENT_R_MT] = MRP_APPLICANT_VP, [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VP, }, [MRP_APPLICANT_VN] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_VN, [MRP_EVENT_LV] = MRP_APPLICANT_LA, [MRP_EVENT_TX] = MRP_APPLICANT_AN, [MRP_EVENT_R_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_VN, [MRP_EVENT_R_IN] = MRP_APPLICANT_VN, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VN, [MRP_EVENT_R_MT] = MRP_APPLICANT_VN, [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VN, }, [MRP_APPLICANT_AN] = { [MRP_EVENT_NEW] = MRP_APPLICANT_AN, [MRP_EVENT_JOIN] = MRP_APPLICANT_AN, [MRP_EVENT_LV] = MRP_APPLICANT_LA, [MRP_EVENT_TX] = MRP_APPLICANT_QA, [MRP_EVENT_R_NEW] = MRP_APPLICANT_AN, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AN, [MRP_EVENT_R_IN] = MRP_APPLICANT_AN, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AN, [MRP_EVENT_R_MT] = MRP_APPLICANT_AN, [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AN, }, [MRP_APPLICANT_AA] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, [MRP_EVENT_LV] = MRP_APPLICANT_LA, [MRP_EVENT_TX] = MRP_APPLICANT_QA, [MRP_EVENT_R_NEW] = MRP_APPLICANT_AA, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, [MRP_EVENT_R_IN] = MRP_APPLICANT_AA, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, }, [MRP_APPLICANT_QA] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_QA, [MRP_EVENT_LV] = MRP_APPLICANT_LA, [MRP_EVENT_TX] = MRP_APPLICANT_QA, [MRP_EVENT_R_NEW] = MRP_APPLICANT_QA, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, [MRP_EVENT_R_IN] = MRP_APPLICANT_QA, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, }, [MRP_APPLICANT_LA] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, [MRP_EVENT_LV] = MRP_APPLICANT_LA, [MRP_EVENT_TX] = MRP_APPLICANT_VO, [MRP_EVENT_R_NEW] = MRP_APPLICANT_LA, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_LA, [MRP_EVENT_R_IN] = MRP_APPLICANT_LA, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_LA, [MRP_EVENT_R_MT] = MRP_APPLICANT_LA, [MRP_EVENT_R_LV] = MRP_APPLICANT_LA, [MRP_EVENT_R_LA] = MRP_APPLICANT_LA, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_LA, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_LA, }, [MRP_APPLICANT_AO] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, [MRP_EVENT_LV] = MRP_APPLICANT_AO, [MRP_EVENT_TX] = MRP_APPLICANT_AO, [MRP_EVENT_R_NEW] = MRP_APPLICANT_AO, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, [MRP_EVENT_R_IN] = MRP_APPLICANT_AO, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AO, }, [MRP_APPLICANT_QO] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, [MRP_EVENT_LV] = MRP_APPLICANT_QO, [MRP_EVENT_TX] = MRP_APPLICANT_QO, [MRP_EVENT_R_NEW] = MRP_APPLICANT_QO, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, [MRP_EVENT_R_IN] = MRP_APPLICANT_QO, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_QO, }, [MRP_APPLICANT_AP] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, [MRP_EVENT_LV] = MRP_APPLICANT_AO, [MRP_EVENT_TX] = MRP_APPLICANT_QA, [MRP_EVENT_R_NEW] = MRP_APPLICANT_AP, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, [MRP_EVENT_R_IN] = MRP_APPLICANT_AP, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, }, [MRP_APPLICANT_QP] = { [MRP_EVENT_NEW] = MRP_APPLICANT_VN, [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, [MRP_EVENT_LV] = MRP_APPLICANT_QO, [MRP_EVENT_TX] = MRP_APPLICANT_QP, [MRP_EVENT_R_NEW] = MRP_APPLICANT_QP, [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, [MRP_EVENT_R_IN] = MRP_APPLICANT_QP, [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, }, }; static const u8 mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = { [MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL, [MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN, [MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW, [MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW, [MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN, [MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, [MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV, [MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL, [MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL, [MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN, [MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL, }; static void mrp_attrvalue_inc(void *value, u8 len) { u8 *v = (u8 *)value; /* Add 1 to the last byte. If it becomes zero, * go to the previous byte and repeat. */ while (len > 0 && !++v[--len]) ; } static int mrp_attr_cmp(const struct mrp_attr *attr, const void *value, u8 len, u8 type) { if (attr->type != type) return attr->type - type; if (attr->len != len) return attr->len - len; return memcmp(attr->value, value, len); } static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app, const void *value, u8 len, u8 type) { struct rb_node *parent = app->mad.rb_node; struct mrp_attr *attr; int d; while (parent) { attr = rb_entry(parent, struct mrp_attr, node); d = mrp_attr_cmp(attr, value, len, type); if (d > 0) parent = parent->rb_left; else if (d < 0) parent = parent->rb_right; else return attr; } return NULL; } static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app, const void *value, u8 len, u8 type) { struct rb_node *parent = NULL, **p = &app->mad.rb_node; struct mrp_attr *attr; int d; while (*p) { parent = *p; attr = rb_entry(parent, struct mrp_attr, node); d = mrp_attr_cmp(attr, value, len, type); if (d > 0) p = &parent->rb_left; else if (d < 0) p = &parent->rb_right; else { /* The attribute already exists; re-use it. */ return attr; } } attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); if (!attr) return attr; attr->state = MRP_APPLICANT_VO; attr->type = type; attr->len = len; memcpy(attr->value, value, len); rb_link_node(&attr->node, parent, p); rb_insert_color(&attr->node, &app->mad); return attr; } static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) { rb_erase(&attr->node, &app->mad); kfree(attr); } static void mrp_attr_destroy_all(struct mrp_applicant *app) { struct rb_node *node, *next; struct mrp_attr *attr; for (node = rb_first(&app->mad); next = node ? rb_next(node) : NULL, node != NULL; node = next) { attr = rb_entry(node, struct mrp_attr, node); mrp_attr_destroy(app, attr); } } static int mrp_pdu_init(struct mrp_applicant *app) { struct sk_buff *skb; struct mrp_pdu_hdr *ph; skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), GFP_ATOMIC); if (!skb) return -ENOMEM; skb->dev = app->dev; skb->protocol = app->app->pkttype.type; skb_reserve(skb, LL_RESERVED_SPACE(app->dev)); skb_reset_network_header(skb); skb_reset_transport_header(skb); ph = __skb_put(skb, sizeof(*ph)); ph->version = app->app->version; app->pdu = skb; return 0; } static int mrp_pdu_append_end_mark(struct mrp_applicant *app) { __be16 *endmark; if (skb_tailroom(app->pdu) < sizeof(*endmark)) return -1; endmark = __skb_put(app->pdu, sizeof(*endmark)); put_unaligned(MRP_END_MARK, endmark); return 0; } static void mrp_pdu_queue(struct mrp_applicant *app) { if (!app->pdu) return; if (mrp_cb(app->pdu)->mh) mrp_pdu_append_end_mark(app); mrp_pdu_append_end_mark(app); dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type), app->app->group_address, app->dev->dev_addr, app->pdu->len); skb_queue_tail(&app->queue, app->pdu); app->pdu = NULL; } static void mrp_queue_xmit(struct mrp_applicant *app) { struct sk_buff *skb; while ((skb = skb_dequeue(&app->queue))) dev_queue_xmit(skb); } static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, u8 attrtype, u8 attrlen) { struct mrp_msg_hdr *mh; if (mrp_cb(app->pdu)->mh) { if (mrp_pdu_append_end_mark(app) < 0) return -1; mrp_cb(app->pdu)->mh = NULL; mrp_cb(app->pdu)->vah = NULL; } if (skb_tailroom(app->pdu) < sizeof(*mh)) return -1; mh = __skb_put(app->pdu, sizeof(*mh)); mh->attrtype = attrtype; mh->attrlen = attrlen; mrp_cb(app->pdu)->mh = mh; return 0; } static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, const void *firstattrvalue, u8 attrlen) { struct mrp_vecattr_hdr *vah; if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) return -1; vah = __skb_put(app->pdu, sizeof(*vah) + attrlen); put_unaligned(0, &vah->lenflags); memcpy(vah->firstattrvalue, firstattrvalue, attrlen); mrp_cb(app->pdu)->vah = vah; memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen); return 0; } static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app, const struct mrp_attr *attr, enum mrp_vecattr_event vaevent) { u16 len, pos; u8 *vaevents; int err; again: if (!app->pdu) { err = mrp_pdu_init(app); if (err < 0) return err; } /* If there is no Message header in the PDU, or the Message header is * for a different attribute type, add an EndMark (if necessary) and a * new Message header to the PDU. */ if (!mrp_cb(app->pdu)->mh || mrp_cb(app->pdu)->mh->attrtype != attr->type || mrp_cb(app->pdu)->mh->attrlen != attr->len) { if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0) goto queue; } /* If there is no VectorAttribute header for this Message in the PDU, * or this attribute's value does not sequentially follow the previous * attribute's value, add a new VectorAttribute header to the PDU. */ if (!mrp_cb(app->pdu)->vah || memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) { if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0) goto queue; } len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags)); pos = len % 3; /* Events are packed into Vectors in the PDU, three to a byte. Add a * byte to the end of the Vector if necessary. */ if (!pos) { if (skb_tailroom(app->pdu) < sizeof(u8)) goto queue; vaevents = __skb_put(app->pdu, sizeof(u8)); } else { vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); } switch (pos) { case 0: *vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX * __MRP_VECATTR_EVENT_MAX); break; case 1: *vaevents += vaevent * __MRP_VECATTR_EVENT_MAX; break; case 2: *vaevents += vaevent; break; default: WARN_ON(1); } /* Increment the length of the VectorAttribute in the PDU, as well as * the value of the next attribute that would continue its Vector. */ put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags); mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len); return 0; queue: mrp_pdu_queue(app); goto again; } static void mrp_attr_event(struct mrp_applicant *app, struct mrp_attr *attr, enum mrp_event event) { enum mrp_applicant_state state; state = mrp_applicant_state_table[attr->state][event]; if (state == MRP_APPLICANT_INVALID) { WARN_ON(1); return; } if (event == MRP_EVENT_TX) { /* When appending the attribute fails, don't update its state * in order to retry at the next TX event. */ switch (mrp_tx_action_table[attr->state]) { case MRP_TX_ACTION_NONE: case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL: case MRP_TX_ACTION_S_IN_OPTIONAL: break; case MRP_TX_ACTION_S_NEW: if (mrp_pdu_append_vecattr_event( app, attr, MRP_VECATTR_EVENT_NEW) < 0) return; break; case MRP_TX_ACTION_S_JOIN_IN: if (mrp_pdu_append_vecattr_event( app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0) return; break; case MRP_TX_ACTION_S_LV: if (mrp_pdu_append_vecattr_event( app, attr, MRP_VECATTR_EVENT_LV) < 0) return; /* As a pure applicant, sending a leave message * implies that the attribute was unregistered and * can be destroyed. */ mrp_attr_destroy(app, attr); return; default: WARN_ON(1); } } attr->state = state; } int mrp_request_join(const struct net_device *dev, const struct mrp_application *appl, const void *value, u8 len, u8 type) { struct mrp_port *port = rtnl_dereference(dev->mrp_port); struct mrp_applicant *app = rtnl_dereference( port->applicants[appl->type]); struct mrp_attr *attr; if (sizeof(struct mrp_skb_cb) + len > sizeof_field(struct sk_buff, cb)) return -ENOMEM; spin_lock_bh(&app->lock); attr = mrp_attr_create(app, value, len, type); if (!attr) { spin_unlock_bh(&app->lock); return -ENOMEM; } mrp_attr_event(app, attr, MRP_EVENT_JOIN); spin_unlock_bh(&app->lock); return 0; } EXPORT_SYMBOL_GPL(mrp_request_join); void mrp_request_leave(const struct net_device *dev, const struct mrp_application *appl, const void *value, u8 len, u8 type) { struct mrp_port *port = rtnl_dereference(dev->mrp_port); struct mrp_applicant *app = rtnl_dereference( port->applicants[appl->type]); struct mrp_attr *attr; if (sizeof(struct mrp_skb_cb) + len > sizeof_field(struct sk_buff, cb)) return; spin_lock_bh(&app->lock); attr = mrp_attr_lookup(app, value, len, type); if (!attr) { spin_unlock_bh(&app->lock); return; } mrp_attr_event(app, attr, MRP_EVENT_LV); spin_unlock_bh(&app->lock); } EXPORT_SYMBOL_GPL(mrp_request_leave); static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event) { struct rb_node *node, *next; struct mrp_attr *attr; for (node = rb_first(&app->mad); next = node ? rb_next(node) : NULL, node != NULL; node = next) { attr = rb_entry(node, struct mrp_attr, node); mrp_attr_event(app, attr, event); } } static void mrp_join_timer_arm(struct mrp_applicant *app) { unsigned long delay; delay = get_random_u32_below(msecs_to_jiffies(mrp_join_time)); mod_timer(&app->join_timer, jiffies + delay); } static void mrp_join_timer(struct timer_list *t) { struct mrp_applicant *app = from_timer(app, t, join_timer); spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); spin_unlock(&app->lock); mrp_queue_xmit(app); spin_lock(&app->lock); if (likely(app->active)) mrp_join_timer_arm(app); spin_unlock(&app->lock); } static void mrp_periodic_timer_arm(struct mrp_applicant *app) { mod_timer(&app->periodic_timer, jiffies + msecs_to_jiffies(mrp_periodic_time)); } static void mrp_periodic_timer(struct timer_list *t) { struct mrp_applicant *app = from_timer(app, t, periodic_timer); spin_lock(&app->lock); if (likely(app->active)) { mrp_mad_event(app, MRP_EVENT_PERIODIC); mrp_pdu_queue(app); mrp_periodic_timer_arm(app); } spin_unlock(&app->lock); } static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) { __be16 endmark; if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0) return -1; if (endmark == MRP_END_MARK) { *offset += sizeof(endmark); return -1; } return 0; } static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app, struct sk_buff *skb, enum mrp_vecattr_event vaevent) { struct mrp_attr *attr; enum mrp_event event; attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue, mrp_cb(skb)->mh->attrlen, mrp_cb(skb)->mh->attrtype); if (attr == NULL) return; switch (vaevent) { case MRP_VECATTR_EVENT_NEW: event = MRP_EVENT_R_NEW; break; case MRP_VECATTR_EVENT_JOIN_IN: event = MRP_EVENT_R_JOIN_IN; break; case MRP_VECATTR_EVENT_IN: event = MRP_EVENT_R_IN; break; case MRP_VECATTR_EVENT_JOIN_MT: event = MRP_EVENT_R_JOIN_MT; break; case MRP_VECATTR_EVENT_MT: event = MRP_EVENT_R_MT; break; case MRP_VECATTR_EVENT_LV: event = MRP_EVENT_R_LV; break; default: return; } mrp_attr_event(app, attr, event); } static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, struct sk_buff *skb, int *offset) { struct mrp_vecattr_hdr _vah; u16 valen; u8 vaevents, vaevent; mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah), &_vah); if (!mrp_cb(skb)->vah) return -1; *offset += sizeof(_vah); if (get_unaligned(&mrp_cb(skb)->vah->lenflags) & MRP_VECATTR_HDR_FLAG_LA) mrp_mad_event(app, MRP_EVENT_R_LA); valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & MRP_VECATTR_HDR_LEN_MASK); /* The VectorAttribute structure in a PDU carries event information * about one or more attributes having consecutive values. Only the * value for the first attribute is contained in the structure. So * we make a copy of that value, and then increment it each time we * advance to the next event in its Vector. */ if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen > sizeof_field(struct sk_buff, cb)) return -1; if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue, mrp_cb(skb)->mh->attrlen) < 0) return -1; *offset += mrp_cb(skb)->mh->attrlen; /* In a VectorAttribute, the Vector contains events which are packed * three to a byte. We process one byte of the Vector at a time. */ while (valen > 0) { if (skb_copy_bits(skb, *offset, &vaevents, sizeof(vaevents)) < 0) return -1; *offset += sizeof(vaevents); /* Extract and process the first event. */ vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX * __MRP_VECATTR_EVENT_MAX); if (vaevent >= __MRP_VECATTR_EVENT_MAX) { /* The byte is malformed; stop processing. */ return -1; } mrp_pdu_parse_vecattr_event(app, skb, vaevent); /* If present, extract and process the second event. */ if (!--valen) break; mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, mrp_cb(skb)->mh->attrlen); vaevents %= (__MRP_VECATTR_EVENT_MAX * __MRP_VECATTR_EVENT_MAX); vaevent = vaevents / __MRP_VECATTR_EVENT_MAX; mrp_pdu_parse_vecattr_event(app, skb, vaevent); /* If present, extract and process the third event. */ if (!--valen) break; mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, mrp_cb(skb)->mh->attrlen); vaevents %= __MRP_VECATTR_EVENT_MAX; vaevent = vaevents; mrp_pdu_parse_vecattr_event(app, skb, vaevent); } return 0; } static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb, int *offset) { struct mrp_msg_hdr _mh; mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh); if (!mrp_cb(skb)->mh) return -1; *offset += sizeof(_mh); if (mrp_cb(skb)->mh->attrtype == 0 || mrp_cb(skb)->mh->attrtype > app->app->maxattr || mrp_cb(skb)->mh->attrlen == 0) return -1; while (skb->len > *offset) { if (mrp_pdu_parse_end_mark(skb, offset) < 0) break; if (mrp_pdu_parse_vecattr(app, skb, offset) < 0) return -1; } return 0; } static int mrp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct mrp_application *appl = container_of(pt, struct mrp_application, pkttype); struct mrp_port *port; struct mrp_applicant *app; struct mrp_pdu_hdr _ph; const struct mrp_pdu_hdr *ph; int offset = skb_network_offset(skb); /* If the interface is in promiscuous mode, drop the packet if * it was unicast to another host. */ if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) goto out; skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto out; port = rcu_dereference(dev->mrp_port); if (unlikely(!port)) goto out; app = rcu_dereference(port->applicants[appl->type]); if (unlikely(!app)) goto out; ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph); if (!ph) goto out; offset += sizeof(_ph); if (ph->version != app->app->version) goto out; spin_lock(&app->lock); while (skb->len > offset) { if (mrp_pdu_parse_end_mark(skb, &offset) < 0) break; if (mrp_pdu_parse_msg(app, skb, &offset) < 0) break; } spin_unlock(&app->lock); out: kfree_skb(skb); return 0; } static int mrp_init_port(struct net_device *dev) { struct mrp_port *port; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return -ENOMEM; rcu_assign_pointer(dev->mrp_port, port); return 0; } static void mrp_release_port(struct net_device *dev) { struct mrp_port *port = rtnl_dereference(dev->mrp_port); unsigned int i; for (i = 0; i <= MRP_APPLICATION_MAX; i++) { if (rtnl_dereference(port->applicants[i])) return; } RCU_INIT_POINTER(dev->mrp_port, NULL); kfree_rcu(port, rcu); } int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) { struct mrp_applicant *app; int err; ASSERT_RTNL(); if (!rtnl_dereference(dev->mrp_port)) { err = mrp_init_port(dev); if (err < 0) goto err1; } err = -ENOMEM; app = kzalloc(sizeof(*app), GFP_KERNEL); if (!app) goto err2; err = dev_mc_add(dev, appl->group_address); if (err < 0) goto err3; app->dev = dev; app->app = appl; app->mad = RB_ROOT; app->active = true; spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); timer_setup(&app->join_timer, mrp_join_timer, 0); mrp_join_timer_arm(app); timer_setup(&app->periodic_timer, mrp_periodic_timer, 0); mrp_periodic_timer_arm(app); return 0; err3: kfree(app); err2: mrp_release_port(dev); err1: return err; } EXPORT_SYMBOL_GPL(mrp_init_applicant); void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) { struct mrp_port *port = rtnl_dereference(dev->mrp_port); struct mrp_applicant *app = rtnl_dereference( port->applicants[appl->type]); ASSERT_RTNL(); RCU_INIT_POINTER(port->applicants[appl->type], NULL); spin_lock_bh(&app->lock); app->active = false; spin_unlock_bh(&app->lock); /* Delete timer and generate a final TX event to flush out * all pending messages before the applicant is gone. */ timer_shutdown_sync(&app->join_timer); timer_shutdown_sync(&app->periodic_timer); spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_attr_destroy_all(app); mrp_pdu_queue(app); spin_unlock_bh(&app->lock); mrp_queue_xmit(app); dev_mc_del(dev, appl->group_address); kfree_rcu(app, rcu); mrp_release_port(dev); } EXPORT_SYMBOL_GPL(mrp_uninit_applicant); int mrp_register_application(struct mrp_application *appl) { appl->pkttype.func = mrp_rcv; dev_add_pack(&appl->pkttype); return 0; } EXPORT_SYMBOL_GPL(mrp_register_application); void mrp_unregister_application(struct mrp_application *appl) { dev_remove_pack(&appl->pkttype); } EXPORT_SYMBOL_GPL(mrp_unregister_application);
75 76 76 76 76 76 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 // SPDX-License-Identifier: GPL-2.0-only /* * net/sunrpc/rpc_pipe.c * * Userland/kernel interface for rpcauth_gss. * Code shamelessly plagiarized from fs/nfsd/nfsctl.c * and fs/sysfs/inode.c * * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no> * */ #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/namei.h> #include <linux/fsnotify.h> #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/utsname.h> #include <asm/ioctls.h> #include <linux/poll.h> #include <linux/wait.h> #include <linux/seq_file.h> #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/cache.h> #include <linux/nsproxy.h> #include <linux/notifier.h> #include "netns.h" #include "sunrpc.h" #define RPCDBG_FACILITY RPCDBG_DEBUG #define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") static struct file_system_type rpc_pipe_fs_type; static const struct rpc_pipe_ops gssd_dummy_pipe_ops; static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); int rpc_pipefs_notifier_register(struct notifier_block *nb) { return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); void rpc_pipefs_notifier_unregister(struct notifier_block *nb) { blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; if (list_empty(head)) return; do { msg = list_entry(head->next, struct rpc_pipe_msg, list); list_del_init(&msg->list); msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); if (waitq) wake_up(waitq); } static void rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); struct rpc_pipe *pipe = container_of(work, struct rpc_pipe, queue_timeout.work); void (*destroy_msg)(struct rpc_pipe_msg *); struct dentry *dentry; spin_lock(&pipe->lock); destroy_msg = pipe->ops->destroy_msg; if (pipe->nreaders == 0) { list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); rpc_purge_list(dentry ? &RPC_I(d_inode(dentry))->waitq : NULL, &free_list, destroy_msg, -ETIMEDOUT); dput(dentry); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, char __user *dst, size_t buflen) { char *data = (char *)msg->data + msg->copied; size_t mlen = min(msg->len - msg->copied, buflen); unsigned long left; left = copy_to_user(dst, data, mlen); if (left == mlen) { msg->errno = -EFAULT; return -EFAULT; } mlen -= left; msg->copied += mlen; msg->errno = 0; return mlen; } EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); /** * rpc_queue_upcall - queue an upcall message to userspace * @pipe: upcall pipe on which to queue given message * @msg: message to queue * * Call with an @inode created by rpc_mkpipe() to queue an upcall. * A userspace process may then later read the upcall by performing a * read on an open file for this inode. It is up to the caller to * initialize the fields of @msg (other than @msg->list) appropriately. */ int rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) { int res = -EPIPE; struct dentry *dentry; spin_lock(&pipe->lock); if (pipe->nreaders) { list_add_tail(&msg->list, &pipe->pipe); pipe->pipelen += msg->len; res = 0; } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { if (list_empty(&pipe->pipe)) queue_delayed_work(rpciod_workqueue, &pipe->queue_timeout, RPC_UPCALL_TIMEOUT); list_add_tail(&msg->list, &pipe->pipe); pipe->pipelen += msg->len; res = 0; } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); if (dentry) { wake_up(&RPC_I(d_inode(dentry))->waitq); dput(dentry); } return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); static inline void rpc_inode_setowner(struct inode *inode, void *private) { RPC_I(inode)->private = private; } static void rpc_close_pipes(struct inode *inode) { struct rpc_pipe *pipe = RPC_I(inode)->pipe; int need_release; LIST_HEAD(free_list); inode_lock(inode); spin_lock(&pipe->lock); need_release = pipe->nreaders != 0 || pipe->nwriters != 0; pipe->nreaders = 0; list_splice_init(&pipe->in_upcall, &free_list); list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; pipe->dentry = NULL; spin_unlock(&pipe->lock); rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE); pipe->nwriters = 0; if (need_release && pipe->ops->release_pipe) pipe->ops->release_pipe(inode); cancel_delayed_work_sync(&pipe->queue_timeout); rpc_inode_setowner(inode, NULL); RPC_I(inode)->pipe = NULL; inode_unlock(inode); } static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; } static void rpc_free_inode(struct inode *inode) { kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } static int rpc_pipe_open(struct inode *inode, struct file *filp) { struct rpc_pipe *pipe; int first_open; int res = -ENXIO; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; first_open = pipe->nreaders == 0 && pipe->nwriters == 0; if (first_open && pipe->ops->open_pipe) { res = pipe->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) pipe->nreaders++; if (filp->f_mode & FMODE_WRITE) pipe->nwriters++; res = 0; out: inode_unlock(inode); return res; } static int rpc_pipe_release(struct inode *inode, struct file *filp) { struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int last_close; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; msg = filp->private_data; if (msg != NULL) { spin_lock(&pipe->lock); msg->errno = -EAGAIN; list_del_init(&msg->list); spin_unlock(&pipe->lock); pipe->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) pipe->nwriters --; if (filp->f_mode & FMODE_READ) { pipe->nreaders --; if (pipe->nreaders == 0) { LIST_HEAD(free_list); spin_lock(&pipe->lock); list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; spin_unlock(&pipe->lock); rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EAGAIN); } } last_close = pipe->nwriters == 0 && pipe->nreaders == 0; if (last_close && pipe->ops->release_pipe) pipe->ops->release_pipe(inode); out: inode_unlock(inode); return 0; } static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { spin_lock(&pipe->lock); if (!list_empty(&pipe->pipe)) { msg = list_entry(pipe->pipe.next, struct rpc_pipe_msg, list); list_move(&msg->list, &pipe->in_upcall); pipe->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } spin_unlock(&pipe->lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ res = pipe->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; spin_lock(&pipe->lock); list_del_init(&msg->list); spin_unlock(&pipe->lock); pipe->ops->destroy_msg(msg); } out_unlock: inode_unlock(inode); return res; } static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = file_inode(filp); int res; inode_lock(inode); res = -EPIPE; if (RPC_I(inode)->pipe != NULL) res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); inode_unlock(inode); return res; } static __poll_t rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { struct inode *inode = file_inode(filp); struct rpc_inode *rpci = RPC_I(inode); __poll_t mask = EPOLLOUT | EPOLLWRNORM; poll_wait(filp, &rpci->waitq, wait); inode_lock(inode); if (rpci->pipe == NULL) mask |= EPOLLERR | EPOLLHUP; else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) mask |= EPOLLIN | EPOLLRDNORM; inode_unlock(inode); return mask; } static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; int len; switch (cmd) { case FIONREAD: inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) { inode_unlock(inode); return -EPIPE; } spin_lock(&pipe->lock); len = pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } spin_unlock(&pipe->lock); inode_unlock(inode); return put_user(len, (int __user *)arg); default: return -EINVAL; } } static const struct file_operations rpc_pipe_fops = { .owner = THIS_MODULE, .read = rpc_pipe_read, .write = rpc_pipe_write, .poll = rpc_pipe_poll, .unlocked_ioctl = rpc_pipe_ioctl, .open = rpc_pipe_open, .release = rpc_pipe_release, }; static int rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; rcu_read_lock(); seq_printf(m, "RPC server: %s\n", rcu_dereference(clnt->cl_xprt)->servername); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); rcu_read_unlock(); return 0; } static int rpc_info_open(struct inode *inode, struct file *file) { struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; spin_lock(&file->f_path.dentry->d_lock); if (!d_unhashed(file->f_path.dentry)) clnt = RPC_I(inode)->private; if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) { spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } } return ret; } static int rpc_info_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct rpc_clnt *clnt = (struct rpc_clnt *)m->private; if (clnt) rpc_release_client(clnt); return single_release(inode, file); } static const struct file_operations rpc_info_operations = { .owner = THIS_MODULE, .open = rpc_info_open, .read = seq_read, .llseek = seq_lseek, .release = rpc_info_release, }; /* * Description of fs contents. */ struct rpc_filelist { const char *name; const struct file_operations *i_fop; umode_t mode; }; static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) return NULL; inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); break; default: break; } return inode; } static int __rpc_create_common(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { struct inode *inode; d_drop(dentry); inode = rpc_get_inode(dir->i_sb, mode); if (!inode) goto out_err; inode->i_ino = iunique(dir->i_sb, 100); if (i_fop) inode->i_fop = i_fop; if (private) rpc_inode_setowner(inode, private); d_add(dentry, inode); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n", __FILE__, __func__, dentry); dput(dentry); return -ENOMEM; } static int __rpc_create(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { int err; err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); if (err) return err; fsnotify_create(dir, dentry); return 0; } static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { int err; err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); if (err) return err; inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; } static void init_pipe(struct rpc_pipe *pipe) { pipe->nreaders = 0; pipe->nwriters = 0; INIT_LIST_HEAD(&pipe->in_upcall); INIT_LIST_HEAD(&pipe->in_downcall); INIT_LIST_HEAD(&pipe->pipe); pipe->pipelen = 0; INIT_DELAYED_WORK(&pipe->queue_timeout, rpc_timeout_upcall_queue); pipe->ops = NULL; spin_lock_init(&pipe->lock); pipe->dentry = NULL; } void rpc_destroy_pipe_data(struct rpc_pipe *pipe) { kfree(pipe); } EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data); struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) { struct rpc_pipe *pipe; pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); if (!pipe) return ERR_PTR(-ENOMEM); init_pipe(pipe); pipe->ops = ops; pipe->flags = flags; return pipe; } EXPORT_SYMBOL_GPL(rpc_mkpipe_data); static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private, struct rpc_pipe *pipe) { struct rpc_inode *rpci; int err; err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); if (err) return err; rpci = RPC_I(d_inode(dentry)); rpci->private = private; rpci->pipe = pipe; fsnotify_create(dir, dentry); return 0; } static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int ret; dget(dentry); ret = simple_rmdir(dir, dentry); d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); dput(dentry); return ret; } static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; dget(dentry); ret = simple_unlink(dir, dentry); d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); dput(dentry); return ret; } static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); rpc_close_pipes(inode); return __rpc_unlink(dir, dentry); } static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, const char *name) { struct qstr q = QSTR_INIT(name, strlen(name)); struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { dentry = d_alloc(parent, &q); if (!dentry) return ERR_PTR(-ENOMEM); } if (d_really_is_negative(dentry)) return dentry; dput(dentry); return ERR_PTR(-EEXIST); } /* * FIXME: This probably has races. */ static void __rpc_depopulate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof) { struct inode *dir = d_inode(parent); struct dentry *dentry; struct qstr name; int i; for (i = start; i < eof; i++) { name.name = files[i].name; name.len = strlen(files[i].name); dentry = d_hash_and_lookup(parent, &name); if (dentry == NULL) continue; if (d_really_is_negative(dentry)) goto next; switch (d_inode(dentry)->i_mode & S_IFMT) { default: BUG(); case S_IFREG: __rpc_unlink(dir, dentry); break; case S_IFDIR: __rpc_rmdir(dir, dentry); } next: dput(dentry); } } static void rpc_depopulate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof) { struct inode *dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_CHILD); __rpc_depopulate(parent, files, start, eof); inode_unlock(dir); } static int rpc_populate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof, void *private) { struct inode *dir = d_inode(parent); struct dentry *dentry; int i, err; inode_lock(dir); for (i = start; i < eof; i++) { dentry = __rpc_lookup_create_exclusive(parent, files[i].name); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_bad; switch (files[i].mode & S_IFMT) { default: BUG(); case S_IFREG: err = __rpc_create(dir, dentry, files[i].mode, files[i].i_fop, private); break; case S_IFDIR: err = __rpc_mkdir(dir, dentry, files[i].mode, NULL, private); } if (err != 0) goto out_bad; } inode_unlock(dir); return 0; out_bad: __rpc_depopulate(parent, files, start, eof); inode_unlock(dir); printk(KERN_WARNING "%s: %s failed to populate directory %pd\n", __FILE__, __func__, parent); return err; } static struct dentry *rpc_mkdir_populate(struct dentry *parent, const char *name, umode_t mode, void *private, int (*populate)(struct dentry *, void *), void *args_populate) { struct dentry *dentry; struct inode *dir = d_inode(parent); int error; inode_lock_nested(dir, I_MUTEX_PARENT); dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; error = __rpc_mkdir(dir, dentry, mode, NULL, private); if (error != 0) goto out_err; if (populate != NULL) { error = populate(dentry, args_populate); if (error) goto err_rmdir; } out: inode_unlock(dir); return dentry; err_rmdir: __rpc_rmdir(dir, dentry); out_err: dentry = ERR_PTR(error); goto out; } static int rpc_rmdir_depopulate(struct dentry *dentry, void (*depopulate)(struct dentry *)) { struct dentry *parent; struct inode *dir; int error; parent = dget_parent(dentry); dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_PARENT); if (depopulate != NULL) depopulate(dentry); error = __rpc_rmdir(dir, dentry); inode_unlock(dir); dput(parent); return error; } /** * rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace * communication * @parent: dentry of directory to create new "pipe" in * @name: name of pipe * @private: private data to associate with the pipe, for the caller's use * @pipe: &rpc_pipe containing input parameters * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to * @ops->upcall, which will be called with the file pointer, * message, and userspace buffer to copy to. * * Writes can come at any time, and do not necessarily have to be * responses to upcalls. They will result in calls to @msg->downcall. * * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file_inode(file))->private. */ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, void *private, struct rpc_pipe *pipe) { struct dentry *dentry; struct inode *dir = d_inode(parent); umode_t umode = S_IFIFO | 0600; int err; if (pipe->ops->upcall == NULL) umode &= ~0444; if (pipe->ops->downcall == NULL) umode &= ~0222; inode_lock_nested(dir, I_MUTEX_PARENT); dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, private, pipe); if (err) goto out_err; out: inode_unlock(dir); return dentry; out_err: dentry = ERR_PTR(err); printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n", __FILE__, __func__, parent, name, err); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); /** * rpc_unlink - remove a pipe * @dentry: dentry for the pipe, as returned from rpc_mkpipe * * After this call, lookups will no longer find the pipe, and any * attempts to read or write using preexisting opens of the pipe will * return -EPIPE. */ int rpc_unlink(struct dentry *dentry) { struct dentry *parent; struct inode *dir; int error = 0; parent = dget_parent(dentry); dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_PARENT); error = __rpc_rmpipe(dir, dentry); inode_unlock(dir); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); /** * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head * @pdh: pointer to struct rpc_pipe_dir_head */ void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh) { INIT_LIST_HEAD(&pdh->pdh_entries); pdh->pdh_dentry = NULL; } EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head); /** * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object * @pdo: pointer to struct rpc_pipe_dir_object * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops * @pdo_data: pointer to caller-defined data */ void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, const struct rpc_pipe_dir_object_ops *pdo_ops, void *pdo_data) { INIT_LIST_HEAD(&pdo->pdo_head); pdo->pdo_ops = pdo_ops; pdo->pdo_data = pdo_data; } EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object); static int rpc_add_pipe_dir_object_locked(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { int ret = 0; if (pdh->pdh_dentry) ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo); if (ret == 0) list_add_tail(&pdo->pdo_head, &pdh->pdh_entries); return ret; } static void rpc_remove_pipe_dir_object_locked(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { if (pdh->pdh_dentry) pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo); list_del_init(&pdo->pdo_head); } /** * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @pdo: pointer to struct rpc_pipe_dir_object * */ int rpc_add_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { int ret = 0; if (list_empty(&pdo->pdo_head)) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo); mutex_unlock(&sn->pipefs_sb_lock); } return ret; } EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object); /** * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @pdo: pointer to struct rpc_pipe_dir_object * */ void rpc_remove_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { if (!list_empty(&pdo->pdo_head)) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); rpc_remove_pipe_dir_object_locked(net, pdh, pdo); mutex_unlock(&sn->pipefs_sb_lock); } } EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object); /** * rpc_find_or_alloc_pipe_dir_object * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @match: match struct rpc_pipe_dir_object to data * @alloc: allocate a new struct rpc_pipe_dir_object * @data: user defined data for match() and alloc() * */ struct rpc_pipe_dir_object * rpc_find_or_alloc_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, int (*match)(struct rpc_pipe_dir_object *, void *), struct rpc_pipe_dir_object *(*alloc)(void *), void *data) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe_dir_object *pdo; mutex_lock(&sn->pipefs_sb_lock); list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) { if (!match(pdo, data)) continue; goto out; } pdo = alloc(data); if (!pdo) goto out; rpc_add_pipe_dir_object_locked(net, pdh, pdo); out: mutex_unlock(&sn->pipefs_sb_lock); return pdo; } EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object); static void rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) { struct rpc_pipe_dir_object *pdo; struct dentry *dir = pdh->pdh_dentry; list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) pdo->pdo_ops->create(dir, pdo); } static void rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) { struct rpc_pipe_dir_object *pdo; struct dentry *dir = pdh->pdh_dentry; list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) pdo->pdo_ops->destroy(dir, pdo); } enum { RPCAUTH_info, RPCAUTH_EOF }; static const struct rpc_filelist authfiles[] = { [RPCAUTH_info] = { .name = "info", .i_fop = &rpc_info_operations, .mode = S_IFREG | 0400, }, }; static int rpc_clntdir_populate(struct dentry *dentry, void *private) { return rpc_populate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF, private); } static void rpc_clntdir_depopulate(struct dentry *dentry) { rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); } /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs * @dentry: the parent of new directory * @name: the name of new directory * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with * @rpc_clnt, which will contain a file named "info" with some basic * information about the client, together with any "pipes" that may * later be created using rpc_mkpipe(). */ struct dentry *rpc_create_client_dir(struct dentry *dentry, const char *name, struct rpc_clnt *rpc_client) { struct dentry *ret; ret = rpc_mkdir_populate(dentry, name, 0555, NULL, rpc_clntdir_populate, rpc_client); if (!IS_ERR(ret)) { rpc_client->cl_pipedir_objects.pdh_dentry = ret; rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); } return ret; } /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() * @rpc_client: rpc_client for the pipe */ int rpc_remove_client_dir(struct rpc_clnt *rpc_client) { struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry; if (dentry == NULL) return 0; rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); rpc_client->cl_pipedir_objects.pdh_dentry = NULL; return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); } static const struct rpc_filelist cache_pipefs_files[3] = { [0] = { .name = "channel", .i_fop = &cache_file_operations_pipefs, .mode = S_IFREG | 0600, }, [1] = { .name = "content", .i_fop = &content_file_operations_pipefs, .mode = S_IFREG | 0400, }, [2] = { .name = "flush", .i_fop = &cache_flush_operations_pipefs, .mode = S_IFREG | 0600, }, }; static int rpc_cachedir_populate(struct dentry *dentry, void *private) { return rpc_populate(dentry, cache_pipefs_files, 0, 3, private); } static void rpc_cachedir_depopulate(struct dentry *dentry) { rpc_depopulate(dentry, cache_pipefs_files, 0, 3); } struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { return rpc_mkdir_populate(parent, name, umode, NULL, rpc_cachedir_populate, cd); } void rpc_remove_cache_dir(struct dentry *dentry) { rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); } /* * populate the filesystem */ static const struct super_operations s_ops = { .alloc_inode = rpc_alloc_inode, .free_inode = rpc_free_inode, .statfs = simple_statfs, }; #define RPCAUTH_GSSMAGIC 0x67596969 /* * We have a single directory with 1 node in it. */ enum { RPCAUTH_lockd, RPCAUTH_mount, RPCAUTH_nfs, RPCAUTH_portmap, RPCAUTH_statd, RPCAUTH_nfsd4_cb, RPCAUTH_cache, RPCAUTH_nfsd, RPCAUTH_gssd, RPCAUTH_RootEOF }; static const struct rpc_filelist files[] = { [RPCAUTH_lockd] = { .name = "lockd", .mode = S_IFDIR | 0555, }, [RPCAUTH_mount] = { .name = "mount", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfs] = { .name = "nfs", .mode = S_IFDIR | 0555, }, [RPCAUTH_portmap] = { .name = "portmap", .mode = S_IFDIR | 0555, }, [RPCAUTH_statd] = { .name = "statd", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd4_cb] = { .name = "nfsd4_cb", .mode = S_IFDIR | 0555, }, [RPCAUTH_cache] = { .name = "cache", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd] = { .name = "nfsd", .mode = S_IFDIR | 0555, }, [RPCAUTH_gssd] = { .name = "gssd", .mode = S_IFDIR | 0555, }, }; /* * This call can be used only in RPC pipefs mount notification hooks. */ struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); return d_hash_and_lookup(sb->s_root, &dir); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); int rpc_pipefs_init_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); sn->gssd_dummy = rpc_mkpipe_data(&gssd_dummy_pipe_ops, 0); if (IS_ERR(sn->gssd_dummy)) return PTR_ERR(sn->gssd_dummy); mutex_init(&sn->pipefs_sb_lock); sn->pipe_version = -1; return 0; } void rpc_pipefs_exit_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); rpc_destroy_pipe_data(sn->gssd_dummy); } /* * This call will be used for per network namespace operations calls. * Note: Function will be returned with pipefs_sb_lock taken if superblock was * found. This lock have to be released by rpc_put_sb_net() when all operations * will be completed. */ struct super_block *rpc_get_sb_net(const struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); if (sn->pipefs_sb) return sn->pipefs_sb; mutex_unlock(&sn->pipefs_sb_lock); return NULL; } EXPORT_SYMBOL_GPL(rpc_get_sb_net); void rpc_put_sb_net(const struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); WARN_ON(sn->pipefs_sb == NULL); mutex_unlock(&sn->pipefs_sb_lock); } EXPORT_SYMBOL_GPL(rpc_put_sb_net); static const struct rpc_filelist gssd_dummy_clnt_dir[] = { [0] = { .name = "clntXX", .mode = S_IFDIR | 0555, }, }; static ssize_t dummy_downcall(struct file *filp, const char __user *src, size_t len) { return -EINVAL; } static const struct rpc_pipe_ops gssd_dummy_pipe_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = dummy_downcall, }; /* * Here we present a bogus "info" file to keep rpc.gssd happy. We don't expect * that it will ever use this info to handle an upcall, but rpc.gssd expects * that this file will be there and have a certain format. */ static int rpc_dummy_info_show(struct seq_file *m, void *v) { seq_printf(m, "RPC server: %s\n", utsname()->nodename); seq_printf(m, "service: foo (1) version 0\n"); seq_printf(m, "address: 127.0.0.1\n"); seq_printf(m, "protocol: tcp\n"); seq_printf(m, "port: 0\n"); return 0; } DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info); static const struct rpc_filelist gssd_dummy_info_file[] = { [0] = { .name = "info", .i_fop = &rpc_dummy_info_fops, .mode = S_IFREG | 0400, }, }; /** * rpc_gssd_dummy_populate - create a dummy gssd pipe * @root: root of the rpc_pipefs filesystem * @pipe_data: pipe data created when netns is initialized * * Create a dummy set of directories and a pipe that gssd can hold open to * indicate that it is up and running. */ static struct dentry * rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) { int ret = 0; struct dentry *gssd_dentry; struct dentry *clnt_dentry = NULL; struct dentry *pipe_dentry = NULL; struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name, strlen(files[RPCAUTH_gssd].name)); /* We should never get this far if "gssd" doesn't exist */ gssd_dentry = d_hash_and_lookup(root, &q); if (!gssd_dentry) return ERR_PTR(-ENOENT); ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL); if (ret) { pipe_dentry = ERR_PTR(ret); goto out; } q.name = gssd_dummy_clnt_dir[0].name; q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL); if (ret) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(ret); goto out; } pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data); if (IS_ERR(pipe_dentry)) { __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); } out: dput(clnt_dentry); dput(gssd_dentry); return pipe_dentry; } static void rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) { struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); dput(pipe_dentry); } static int rpc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dentry *root, *gssd_dentry; struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; sb->s_d_op = &simple_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | 0555); sb->s_root = root = d_make_root(inode); if (!root) return -ENOMEM; if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy); if (IS_ERR(gssd_dentry)) { __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); return PTR_ERR(gssd_dentry); } dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n", net->ns.inum, NET_NAME(net)); mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, sb); if (err) goto err_depopulate; mutex_unlock(&sn->pipefs_sb_lock); return 0; err_depopulate: rpc_gssd_dummy_depopulate(gssd_dentry); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); sn->pipefs_sb = NULL; __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); mutex_unlock(&sn->pipefs_sb_lock); return err; } bool gssd_running(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe = sn->gssd_dummy; return pipe->nreaders || pipe->nwriters; } EXPORT_SYMBOL_GPL(gssd_running); static int rpc_fs_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, rpc_fill_super, get_net(fc->net_ns)); } static void rpc_fs_free_fc(struct fs_context *fc) { if (fc->s_fs_info) put_net(fc->s_fs_info); } static const struct fs_context_operations rpc_fs_context_ops = { .free = rpc_fs_free_fc, .get_tree = rpc_fs_get_tree, }; static int rpc_init_fs_context(struct fs_context *fc) { put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(fc->net_ns->user_ns); fc->ops = &rpc_fs_context_ops; return 0; } static void rpc_kill_sb(struct super_block *sb) { struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); if (sn->pipefs_sb != sb) { mutex_unlock(&sn->pipefs_sb_lock); goto out; } sn->pipefs_sb = NULL; dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n", net->ns.inum, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); mutex_unlock(&sn->pipefs_sb_lock); out: kill_litter_super(sb); put_net(net); } static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", .init_fs_context = rpc_init_fs_context, .kill_sb = rpc_kill_sb, }; MODULE_ALIAS_FS("rpc_pipefs"); MODULE_ALIAS("rpc_pipefs"); static void init_once(void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->pipe = NULL; init_waitqueue_head(&rpci->waitq); } int register_rpc_pipefs(void) { int err; rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), init_once); if (!rpc_inode_cachep) return -ENOMEM; err = rpc_clients_notifier_register(); if (err) goto err_notifier; err = register_filesystem(&rpc_pipe_fs_type); if (err) goto err_register; return 0; err_register: rpc_clients_notifier_unregister(); err_notifier: kmem_cache_destroy(rpc_inode_cachep); return err; } void unregister_rpc_pipefs(void) { rpc_clients_notifier_unregister(); unregister_filesystem(&rpc_pipe_fs_type); kmem_cache_destroy(rpc_inode_cachep); }
557 411 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 #ifndef _LINUX_HASH_H #define _LINUX_HASH_H /* Fast hashing routine for ints, longs and pointers. (C) 2002 Nadia Yvette Chambers, IBM */ #include <asm/types.h> #include <linux/compiler.h> /* * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and * fs/inode.c. It's not actually prime any more (the previous primes * were actively bad for hashing), but the name remains. */ #if BITS_PER_LONG == 32 #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 #define hash_long(val, bits) hash_32(val, bits) #elif BITS_PER_LONG == 64 #define hash_long(val, bits) hash_64(val, bits) #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 #else #error Wordsize not 32 or 64 #endif /* * This hash multiplies the input by a large odd number and takes the * high bits. Since multiplication propagates changes to the most * significant end only, it is essential that the high bits of the * product be used for the hash value. * * Chuck Lever verified the effectiveness of this technique: * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf * * Although a random odd number will do, it turns out that the golden * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice * properties. (See Knuth vol 3, section 6.4, exercise 9.) * * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, * which is very slightly easier to multiply by and makes no * difference to the hash distribution. */ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull #ifdef CONFIG_HAVE_ARCH_HASH /* This header may use the GOLDEN_RATIO_xx constants */ #include <asm/hash.h> #endif /* * The _generic versions exist only so lib/test_hash.c can compare * the arch-optimized versions with the generic. * * Note that if you change these, any <asm/hash.h> that aren't updated * to match need to have their HAVE_ARCH_* define values updated so the * self-test will not false-positive. */ #ifndef HAVE_ARCH__HASH_32 #define __hash_32 __hash_32_generic #endif static inline u32 __hash_32_generic(u32 val) { return val * GOLDEN_RATIO_32; } static inline u32 hash_32(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); } #ifndef HAVE_ARCH_HASH_64 #define hash_64 hash_64_generic #endif static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) { #if BITS_PER_LONG == 64 /* 64x64-bit multiply is efficient on all 64-bit processors */ return val * GOLDEN_RATIO_64 >> (64 - bits); #else /* Hash 64 bits using only 32x32-bit multiply. */ return hash_32((u32)val ^ __hash_32(val >> 32), bits); #endif } static inline u32 hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } /* This really should be called fold32_ptr; it does no hashing to speak of. */ static inline u32 hash32_ptr(const void *ptr) { unsigned long val = (unsigned long)ptr; #if BITS_PER_LONG == 64 val ^= (val >> 32); #endif return (u32)val; } #endif /* _LINUX_HASH_H */
18 18 8 8 8 8 7 1 35 9 19 7 13 90 90 5 56 29 1 1 83 1 1 1 1 1 12 12 10 2 10 9 5 5 8 2 4 6 9 117 117 4 4 2 2 2 108 105 105 111 111 111 2 6 6 6 6 3 3 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 26 2 2 2 28 28 28 28 2 2 2 1 3 28 28 28 28 28 27 28 28 8 20 28 22 6 28 33 33 33 30 30 30 30 2 27 1 28 28 28 28 28 27 1 28 28 1 28 28 28 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 4 4 4 2 1 1 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 2 4 30 30 13 22 18 4 4 1 1 64 64 1 3 1 1 1 1 5 102 3 3 1 10 9 1 9 1 1 8 1 7 1 1 6 1 1 5 2 4 3 1 1 1 1 146 145 2 1 1 16 2 2 16 15 15 15 15 15 4 4 4 18 2 16 4 12 19 1 17 18 18 18 18 2 13 3 12 4 19 19 19 7 52 51 45 45 116 3 114 3 114 39 87 3 87 3 1 1 1 1 1 145 145 145 145 9 48 109 145 116 114 2 116 116 1 1 1 152 152 152 152 152 152 151 151 151 152 145 145 16 130 145 4 4 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 configuration hooks for cfg80211 * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <linux/rcupdate.h> #include <linux/fips.h> #include <linux/if_ether.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "mesh.h" #include "wme.h" static struct ieee80211_link_data * ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, bool require_valid) { struct ieee80211_link_data *link; if (link_id < 0) { /* * For keys, if sdata is not an MLD, we might not use * the return value at all (if it's not a pairwise key), * so in that case (require_valid==false) don't error. */ if (require_valid && ieee80211_vif_is_mld(&sdata->vif)) return ERR_PTR(-EINVAL); return &sdata->deflink; } link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return ERR_PTR(-ENOLINK); return link; } static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { bool mu_mimo_groups = false; bool mu_mimo_follow = false; if (params->vht_mumimo_groups) { u64 membership; BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.position, params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MU_GROUPS); /* don't care about endianness - just check for 0 */ memcpy(&membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); mu_mimo_groups = membership != 0; } if (params->vht_mumimo_follow_addr) { mu_mimo_follow = is_valid_ether_addr(params->vht_mumimo_follow_addr); ether_addr_copy(sdata->u.mntr.mu_follow_addr, params->vht_mumimo_follow_addr); } sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; } static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *monitor_sdata; /* check flags first */ if (params->flags && ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; /* * Prohibit MONITOR_FLAG_COOK_FRAMES and * MONITOR_FLAG_ACTIVE to be changed while the * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; } /* also validate MU-MIMO change */ monitor_sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) return -EOPNOTSUPP; /* apply all changes now - no failures allowed */ if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { if (ieee80211_sdata_running(sdata)) { ieee80211_adjust_monitor_flags(sdata, -1); sdata->u.mntr.flags = params->flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); } else { /* * Because the interface is down, ieee80211_do_stop * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ sdata->u.mntr.flags = params->flags; } } return 0; } static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, struct cfg80211_mbssid_config params, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) return -EINVAL; tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); if (!tx_sdata) return -EINVAL; if (tx_sdata == sdata) { sdata->vif.mbssid_tx_vif = &sdata->vif; } else { sdata->vif.mbssid_tx_vif = &tx_sdata->vif; link_conf->nontransmitted = true; link_conf->bssid_index = params.index; } if (params.ema) link_conf->ema_ap = true; return 0; } static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params); if (err) return ERR_PTR(err); sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (type == NL80211_IFTYPE_MONITOR) { err = ieee80211_set_mon_options(sdata, params); if (err) { ieee80211_if_remove(sdata); return NULL; } } return wdev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (params->use_4addr == ifmgd->use_4addr) return 0; /* FIXME: no support for 4-addr MLO yet */ if (ieee80211_vif_is_mld(&sdata->vif)) return -EOPNOTSUPP; sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { ret = ieee80211_set_mon_options(sdata, params); if (ret) return ret; } return 0; } static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; return ieee80211_do_open(wdev, true); } static void ieee80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } static int ieee80211_start_nan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; ret = ieee80211_do_open(wdev, true); if (ret) return ret; ret = drv_start_nan(sdata->local, sdata, conf); if (ret) ieee80211_sdata_stop(sdata); sdata->u.nan.conf = *conf; return ret; } static void ieee80211_stop_nan(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); drv_stop_nan(sdata->local, sdata); ieee80211_sdata_stop(sdata); } static int ieee80211_nan_change_conf(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_conf new_conf; int ret = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; new_conf = sdata->u.nan.conf; if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) sdata->u.nan.conf = new_conf; return ret; } static int ieee80211_add_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_func *nan_func) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; spin_lock_bh(&sdata->u.nan.func_lock); ret = idr_alloc(&sdata->u.nan.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); spin_unlock_bh(&sdata->u.nan.func_lock); if (ret < 0) return ret; nan_func->instance_id = ret; WARN_ON(nan_func->instance_id == 0); ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { spin_lock_bh(&sdata->u.nan.func_lock); idr_remove(&sdata->u.nan.function_inst_ids, nan_func->instance_id); spin_unlock_bh(&sdata->u.nan.func_lock); } return ret; } static struct cfg80211_nan_func * ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, u64 cookie) { struct cfg80211_nan_func *func; int id; lockdep_assert_held(&sdata->u.nan.func_lock); idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } return NULL; } static void ieee80211_del_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_func *func; u8 instance_id = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN || !ieee80211_sdata_running(sdata)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; spin_unlock_bh(&sdata->u.nan.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); } static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; ieee80211_check_fast_xmit_iface(sdata); return 0; } static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, const u8 *mac_addr, u8 key_idx) { struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; struct sta_info *sta; int ret = -EINVAL; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) return -EINVAL; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return -EINVAL; if (sta->ptk_idx == key_idx) return 0; key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); return ret; } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; if (IS_ERR(link)) return PTR_ERR(link); if (pairwise && params->mode == NL80211_KEY_SET_TX) return ieee80211_set_tx(sdata, mac_addr, key_idx); /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: if (link_id >= 0) return -EINVAL; if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; default: break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); key->conf.link_id = link_id; if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed * using FT, it attempts to set the key before * association has completed, this rejects that attempt * so it will set the key again after association. * * TODO: accept the key if we have a station entry and * add it to the device after the station. */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); return -ENOENT; } } switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: /* no MFP (yet) */ break; case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; #endif case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: /* shouldn't happen */ WARN_ON_ONCE(1); break; } err = ieee80211_key_link(key, link, sta); /* KRACK protection, shouldn't happen but just silently accept key */ if (err == -EALREADY) err = 0; return err; } static struct ieee80211_key * ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_local *local __maybe_unused = sdata->local; struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_key *key; if (link_id >= 0) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } if (mac_addr) { struct sta_info *sta; struct link_sta_info *link_sta; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return NULL; if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { link_sta = &sta->deflink; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) return wiphy_dereference(local->hw.wiphy, link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; lockdep_assert_wiphy(local->hw.wiphy); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { struct ieee80211_sub_if_data *sdata; u8 seq[6] = {0}; struct key_params params; struct ieee80211_key *key; u64 pn64; u32 iv32; u16 iv16; int err = -ENOENT; struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) goto out; memset(&params, 0, sizeof(params)); params.cipher = key->conf.cipher; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: pn64 = atomic64_read(&key->conf.tx_pn); iv32 = TKIP_PN_TO_IV32(pn64); iv16 = TKIP_PN_TO_IV16(pn64); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); iv32 = kseq.tkip.iv32; iv16 = kseq.tkip.iv16; } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; seq[2] = iv32 & 0xff; seq[3] = (iv32 >> 8) & 0xff; seq[4] = (iv32 >> 16) & 0xff; seq[5] = (iv32 >> 24) & 0xff; params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); fallthrough; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); fallthrough; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), gcmp)); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); memcpy(seq, kseq.ccmp.pn, 6); } else { pn64 = atomic64_read(&key->conf.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; seq[2] = pn64 >> 16; seq[3] = pn64 >> 24; seq[4] = pn64 >> 32; seq[5] = pn64 >> 40; } params.seq = seq; params.seq_len = 6; break; default: if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) break; if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) break; drv_get_key_seq(sdata->local, key, &kseq); params.seq = kseq.hw.seq; params.seq_len = kseq.hw.seq_len; break; } callback(cookie, &params); err = 0; out: rcu_read_unlock(); return err; } static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_key(link, key_idx, uni, multi); return 0; } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_mgmt_key(link, key_idx); return 0; } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_beacon_key(link, key_idx); return 0; } void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo) { rinfo->flags = 0; if (rate->flags & IEEE80211_TX_RC_MCS) { rinfo->flags |= RATE_INFO_FLAGS_MCS; rinfo->mcs = rate->idx; } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = ieee80211_rate_get_vht_mcs(rate); rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); if (sband && sband->bitrates) rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_160; else rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, int idx, struct survey_info *survey) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); return drv_get_survey(local, idx, survey); } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { ret = 0; sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, &chanreq.oper)) return 0; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) goto done; if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_EXCLUSIVE); if (ret) return ret; done: local->monitor_chanreq = chanreq; return 0; } static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, struct ieee80211_link_data *link) { struct probe_resp *new, *old; if (!resp || !resp_len) return 1; old = sdata_dereference(link->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = resp_len; memcpy(new->data, resp, resp_len); if (csa) memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); else if (cca) new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(link->u.ap.probe_resp, new); if (old) kfree_rcu(old, rcu_head); return 0; } static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct cfg80211_fils_discovery *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; if (!params->update) return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.fils_discovery, new); } else { RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); } *changed |= BSS_CHANGED_FILS_DISCOVERY; return 0; } static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; if (!params->update) return 0; link_conf->unsol_bcast_probe_resp_interval = params->interval; old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); } else { RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); } *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; return 0; } static int ieee80211_set_ftm_responder_params( struct ieee80211_sub_if_data *sdata, const u8 *lci, size_t lci_len, const u8 *civicloc, size_t civicloc_len, struct ieee80211_bss_conf *link_conf) { struct ieee80211_ftm_responder_params *new, *old; u8 *pos; int len; if (!lci_len && !civicloc_len) return 0; old = link_conf->ftmr_params; len = lci_len + civicloc_len; new = kzalloc(sizeof(*new) + len, GFP_KERNEL); if (!new) return -ENOMEM; pos = (u8 *)(new + 1); if (lci_len) { new->lci_len = lci_len; new->lci = pos; memcpy(pos, lci, lci_len); pos += lci_len; } if (civicloc_len) { new->civicloc_len = civicloc_len; new->civicloc = pos; memcpy(pos, civicloc, civicloc_len); pos += civicloc_len; } link_conf->ftmr_params = new; kfree(old); return 0; } static int ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, struct cfg80211_mbssid_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, struct cfg80211_rnr_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_beacon_data *params, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, u64 *changed) { struct cfg80211_mbssid_elems *mbssid = NULL; struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u64 _changed = BSS_CHANGED_BEACON; struct ieee80211_bss_conf *link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) return -EINVAL; /* new or old head? */ if (params->head) new_head_len = params->head_len; else new_head_len = old->head_len; /* new or old tail? */ if (params->tail || !old) /* params->tail_len will be zero for !params->tail */ new_tail_len = params->tail_len; else new_tail_len = old->tail_len; size = sizeof(*new) + new_head_len + new_tail_len; /* new or old multiple BSSID elements? */ if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (params->rnr_ies) { rnr = params->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } else if (old && old->mbssid_ies) { mbssid = old->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (old && old->rnr_ies) { rnr = old->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); if (!new) return -ENOMEM; /* start filling the new info now */ /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | mbssid_ies | rnr_ies */ new->head = ((u8 *) new) + sizeof(*new); new->tail = new->head + new_head_len; new->head_len = new_head_len; new->tail_len = new_tail_len; /* copy in optional mbssid_ies */ if (mbssid) { u8 *pos = new->tail + new->tail_len; new->mbssid_ies = (void *)pos; pos += struct_size(new->mbssid_ies, elem, mbssid->cnt); pos += ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid); if (rnr) { new->rnr_ies = (void *)pos; pos += struct_size(new->rnr_ies, elem, rnr->cnt); ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); } /* update bssid_indicator */ link_conf->bssid_indicator = ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); } if (csa) { new->cntdwn_current_counter = csa->count; memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); } else if (cca) { new->cntdwn_current_counter = cca->count; new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ if (params->head) memcpy(new->head, params->head, new_head_len); else memcpy(new->head, old->head, new_head_len); /* copy in optional tail */ if (params->tail) memcpy(new->tail, params->tail, new_tail_len); else if (old) memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len, csa, cca, link); if (err < 0) { kfree(new); return err; } if (err == 0) _changed |= BSS_CHANGED_AP_PROBE_RESP; if (params->ftm_responder != -1) { link_conf->ftm_responder = params->ftm_responder; err = ieee80211_set_ftm_responder_params(sdata, params->lci, params->lci_len, params->civicloc, params->civicloc_len, link_conf); if (err < 0) { kfree(new); return err; } _changed |= BSS_CHANGED_FTM_RESPONDER; } rcu_assign_pointer(link->u.ap.beacon, new); sdata->u.ap.active = true; if (old) kfree_rcu(old, rcu_head); *changed |= _changed; return 0; } static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata) { struct ieee80211_link_data *link; u8 link_id, num = 0; if (sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_P2P_GO) return num; if (!sdata->vif.valid_links) return num; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; if (sdata_dereference(link->u.ap.beacon, sdata)) num++; } return num; } static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u64 changed = BSS_CHANGED_BEACON_INT | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | BSS_CHANGED_TWT; int i, err; int prev_beacon_int; unsigned int link_id = params->beacon.link_id; struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; struct ieee80211_chan_req chanreq = { .oper = params->chandef }; lockdep_assert_wiphy(local->hw.wiphy); link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); if (old) return -EALREADY; if (params->smps_mode != NL80211_SMPS_OFF) return -EOPNOTSUPP; link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; if (params->ht_cap) link_conf->ht_ldpc = params->ht_cap->cap_info & cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); if (params->vht_cap) { link_conf->vht_ldpc = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); link_conf->vht_su_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); link_conf->vht_mu_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE); link_conf->vht_mu_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); } if (params->he_cap && params->he_oper) { link_conf->he_support = true; link_conf->htc_trig_based_pkt_ext = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK); link_conf->frame_time_rts_th = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; if (params->beacon.he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } if (params->he_cap) { link_conf->he_ldpc = params->he_cap->phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; link_conf->he_su_beamformee = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; link_conf->he_mu_beamformer = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; link_conf->he_full_ul_mumimo = params->he_cap->phy_cap_info[2] & IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO; } if (params->eht_cap) { if (!link_conf->he_support) return -EOPNOTSUPP; link_conf->eht_support = true; link_conf->eht_su_beamformer = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; link_conf->eht_su_beamformee = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; link_conf->eht_mu_beamformer = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); link_conf->eht_80mhz_full_bw_ul_mumimo = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && params->mbssid_config.tx_wdev) { err = ieee80211_set_ap_mbssid_options(sdata, params->mbssid_config, link_conf); if (err) return err; } err = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); if (err) { link_conf->beacon_int = prev_beacon_int; return err; } /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = params->crypto.control_port_no_preauth; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; vlan->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; vlan->control_port_no_preauth = params->crypto.control_port_no_preauth; } link_conf->dtim_period = params->dtim_period; link_conf->enable_beacon = true; link_conf->allow_p2p_go_ps = sdata->vif.p2p; link_conf->twt_responder = params->twt_responder; link_conf->he_obss_pd = params->he_obss_pd; link_conf->he_bss_color = params->beacon.he_bss_color; sdata->vif.cfg.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; sdata->vif.cfg.ssid_len = params->ssid_len; if (params->ssid_len) memcpy(sdata->vif.cfg.ssid, params->ssid, params->ssid_len); link_conf->hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); memset(&link_conf->p2p_noa_attr, 0, sizeof(link_conf->p2p_noa_attr)); link_conf->p2p_noa_attr.oppps_ctwindow = params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; if (params->p2p_opp_ps) link_conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = params->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) link_conf->beacon_tx_rate = params->beacon_rate; err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL, &changed); if (err < 0) goto error; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) goto error; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) goto error; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { old = sdata_dereference(link->u.ap.beacon, sdata); if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(link->u.ap.beacon, NULL); if (ieee80211_num_beaconing_links(sdata) == 0) sdata->u.ap.active = false; goto error; } ieee80211_recalc_dtim(local, sdata); ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID); ieee80211_link_info_change_notify(sdata, link, changed); if (ieee80211_num_beaconing_links(sdata) <= 1) netif_carrier_on(dev); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_on(vlan->dev); return 0; error: ieee80211_link_release_channel(link); return err; } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_update *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct cfg80211_beacon_data *beacon = &params->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; u64 changed = 0; lockdep_assert_wiphy(wiphy); link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ if (link_conf->csa_active || link_conf->color_change_active) return -EBUSY; old = sdata_dereference(link->u.ap.beacon, sdata); if (!old) return -ENOENT; err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) return err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) return err; if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static void ieee80211_free_next_beacon(struct ieee80211_link_data *link) { if (!link->u.ap.next_beacon) return; kfree(link->u.ap.next_beacon->mbssid_ies); kfree(link->u.ap.next_beacon->rnr_ies); kfree(link->u.ap.next_beacon); link->u.ap.next_beacon = NULL; } static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; struct fils_discovery_data *old_fils_discovery; struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp; struct cfg80211_chan_def chandef; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; LIST_HEAD(keys); lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; old_probe_resp = sdata_dereference(link->u.ap.probe_resp, sdata); old_fils_discovery = sdata_dereference(link->u.ap.fils_discovery, sdata); old_unsol_bcast_probe_resp = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; ieee80211_vif_unblock_queues_csa(sdata); ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); if (ieee80211_num_beaconing_links(sdata) <= 1) { netif_carrier_off(dev); sdata->u.ap.active = false; } /* remove beacon and probe response */ RCU_INIT_POINTER(link->u.ap.beacon, NULL); RCU_INIT_POINTER(link->u.ap.probe_resp, NULL); RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); if (old_fils_discovery) kfree_rcu(old_fils_discovery, rcu_head); if (old_unsol_bcast_probe_resp) kfree_rcu(old_unsol_bcast_probe_resp, rcu_head); kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; __sta_info_flush(sdata, true, link_id); ieee80211_remove_link_keys(link, &keys); if (!list_empty(&keys)) { synchronize_net(); ieee80211_free_key_list(local, &keys); } link_conf->enable_beacon = false; sdata->beacon_rate_set = false; sdata->vif.cfg.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.links[link_id].cac_started) { chandef = link_conf->chanreq.oper; wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL, link_id); } drv_stop_ap(sdata->local, sdata, link_conf); /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); return 0; } static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) { int ret; if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && set & BIT(NL80211_STA_FLAG_ASSOCIATED) && !test_sta_flag(sta, WLAN_STA_ASSOC)) { /* * When peer becomes associated, init rate control as * well. Some drivers require rate control initialized * before drv_sta_state() is called. */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(sta); ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); else ret = 0; if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && test_sta_flag(sta, WLAN_STA_ASSOC)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_NONE); if (ret) return ret; } return 0; } static void sta_apply_mesh_params(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { #ifdef CONFIG_MAC80211_MESH struct ieee80211_sub_if_data *sdata = sta->sdata; u64 changed = 0; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) changed = mesh_plink_inc_estab_count(sdata); sta->mesh->plink_state = params->plink_state; sta->mesh->aid = params->peer_aid; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, sdata->u.mesh.mshcfg.power_mode); ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg); /* init at low value */ ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10); break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: case NL80211_PLINK_OPN_SNT: case NL80211_PLINK_OPN_RCVD: case NL80211_PLINK_CNF_RCVD: case NL80211_PLINK_HOLDING: if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = params->plink_state; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, NL80211_MESH_POWER_UNKNOWN); break; default: /* nothing */ break; } } switch (params->plink_action) { case NL80211_PLINK_ACTION_NO_ACTION: /* nothing */ break; case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } if (params->local_pm) changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); ieee80211_mbss_info_change_notify(sdata, changed); #endif } enum sta_link_apply_mode { STA_LINK_MODE_NEW, STA_LINK_MODE_STA_MODIFY, STA_LINK_MODE_LINK_MODIFY, }; static int sta_link_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, enum sta_link_apply_mode mode, struct link_station_parameters *params) { struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); bool changes = params->link_mac || params->txpwr_set || params->supported_rates_len || params->ht_capa || params->vht_capa || params->he_capa || params->eht_capa || params->opmode_notif_used; switch (mode) { case STA_LINK_MODE_NEW: if (!params->link_mac) return -EINVAL; break; case STA_LINK_MODE_LINK_MODIFY: break; case STA_LINK_MODE_STA_MODIFY: if (params->link_id >= 0) break; if (!changes) return 0; break; } if (!link || !link_sta) return -EINVAL; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->link_mac) { if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN); } else if (!ether_addr_equal(link_sta->addr, params->link_mac)) { return -EINVAL; } } if (params->txpwr_set) { int ret; link_sta->pub->txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) link_sta->pub->txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; } if (params->supported_rates && params->supported_rates_len) { ieee80211_parse_bitrates(link->conf->chanreq.oper.width, sband, params->supported_rates, params->supported_rates_len, &link_sta->pub->supp_rates[sband->band]); } if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, params->vht_capa, NULL, link_sta); if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, (void *)params->he_capa, params->he_capa_len, (void *)params->he_6ghz_capa, link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, params->eht_capa, params->eht_capa_len, link_sta); if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ __ieee80211_vht_handle_opmode(sdata, link_sta, params->opmode_notif, sband->band); } ieee80211_sta_init_nss(link_sta); return 0; } static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 mask, set; int ret = 0; mask = params->sta_flags_mask; set = params->sta_flags_set; if (ieee80211_vif_is_mesh(&sdata->vif)) { /* * In mesh mode, ASSOCIATED isn't part of the nl80211 * API but must follow AUTHENTICATED for driver state. */ if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { /* * TDLS -- everything follows authorized, but * only becoming authorized is possible, not * going back */ if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); } } if (mask & BIT(NL80211_STA_FLAG_WME) && local->hw.queues >= IEEE80211_NUM_ACS) sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); else clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_MFP)) { sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else clear_sta_flag(sta, WLAN_STA_MFP); } if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) set_sta_flag(sta, WLAN_STA_TDLS_PEER); else clear_sta_flag(sta, WLAN_STA_TDLS_PEER); } if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU); /* mark TDLS channel switch support, if the AP allows it */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->deflink.u.mgd.tdls_chan_switch_prohibited && params->ext_capab_len >= 4 && params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH) set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->u.mgd.tdls_wider_bw_prohibited && ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && params->ext_capab_len >= 8 && params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW); if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { sta->sta.uapsd_queues = params->uapsd_queues; sta->sta.max_sp = params->max_sp; } ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, params->ext_capab_len); /* * cfg80211 validates this (1-2007) and allows setting the AID * only when creating a new station entry */ if (params->aid) sta->sta.aid = params->aid; /* * Some of the following updates would be racy if called on an * existing station, via ieee80211_change_station(). However, * all such changes are rejected by cfg80211 except for updates * changing the supported rates on an existing but not yet used * TDLS peer. */ if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY, &params->link_sta_params); if (ret) return ret; if (params->support_p2p_ps >= 0) sta->sta.support_p2p_ps = params->support_p2p_ps; if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } /* Mark the STA as MLO if MLD MAC address is available */ if (params->link_sta_params.mld_mac) sta->sta.mlo = true; return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; lockdep_assert_wiphy(local->hw.wiphy); if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && sdata->vif.type == NL80211_IFTYPE_STATION && !sdata->u.mgd.associated) return -EINVAL; /* * If we have a link ID, it can be a non-MLO station on an AP MLD, * but we need to have a link_mac in that case as well, so use the * STA's MAC address in that case. */ if (params->link_sta_params.link_id >= 0) sta = sta_info_alloc_with_link(sdata, mac, params->link_sta_params.link_id, params->link_sta_params.link_mac ?: mac, GFP_KERNEL); else sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; /* Though the mutex is not needed here (since the station is not * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); return err; } /* * for TDLS and for unassociated station, rate control should be * initialized only when rates are known and station is marked * authorized/associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); return sta_info_insert(sta); } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); sta_info_flush(sdata, params->link_id); return 0; } static int ieee80211_change_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; enum cfg80211_station_type statype; int err; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (!sta) return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.user_mpm) statype = CFG80211_STA_MESH_PEER_USER; else statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; break; case NL80211_IFTYPE_STATION: if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { statype = CFG80211_STA_AP_STA; break; } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) statype = CFG80211_STA_TDLS_PEER_ACTIVE; else statype = CFG80211_STA_TDLS_PEER_SETUP; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: if (test_sta_flag(sta, WLAN_STA_ASSOC)) statype = CFG80211_STA_AP_CLIENT; else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); } } err = sta_apply_parameters(local, sta, params); if (err) return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); } return 0; } #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_add(sdata, dst); if (IS_ERR(mpath)) { rcu_read_unlock(); return PTR_ERR(mpath); } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; } static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { struct sta_info *next_hop_sta = rcu_dereference(mpath->next_hop); if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | MPATH_INFO_DISCOVERY_RETRIES | MPATH_INFO_FLAGS | MPATH_INFO_HOP_COUNT | MPATH_INFO_PATH_CHANGE; pinfo->frame_qlen = mpath->frame_queue.qlen; pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; if (mpath->flags & MESH_PATH_SN_VALID) pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVED) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; pinfo->hop_count = mpath->hop_count; pinfo->path_change_count = mpath->path_change_count; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, struct mpath_info *pinfo) { memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpp(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); return 0; } static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) { return (mask >> (parm-1)) & 0x1; } static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, GFP_KERNEL); if (!new_ie) return -ENOMEM; } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); ifmsh->mesh_sp_id = setup->sync_method; ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) ifmsh->security |= IEEE80211_MESH_SEC_SECURED; /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(sdata->local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = setup->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } return 0; } static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_mesh *ifmsh; sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) conf->element_ttl = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { if (ifmsh->user_mpm) return -EBUSY; conf->auto_open_plinks = nconf->auto_open_plinks; } if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) conf->dot11MeshNbrOffsetMaxNeighbor = nconf->dot11MeshNbrOffsetMaxNeighbor; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) conf->dot11MeshHWMPmaxPREQretries = nconf->dot11MeshHWMPmaxPREQretries; if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) conf->path_refresh_time = nconf->path_refresh_time; if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) conf->min_discovery_timeout = nconf->min_discovery_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathTimeout = nconf->dot11MeshHWMPactivePathTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) conf->dot11MeshHWMPpreqMinInterval = nconf->dot11MeshHWMPpreqMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask)) conf->dot11MeshHWMPperrMinInterval = nconf->dot11MeshHWMPperrMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { /* our current gate announcement implementation rides on root * announcements, so require this ifmsh to also be a root node * */ if (nconf->dot11MeshGateAnnouncementProtocol && !(conf->dot11MeshHWMPRootMode > IEEE80211_ROOTMODE_ROOT)) { conf->dot11MeshHWMPRootMode = IEEE80211_PROACTIVE_RANN; ieee80211_mesh_root_setup(ifmsh); } conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { /* our RSSI threshold implementation is supported only for * devices that report signal in dBm. */ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) return -EOPNOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) { conf->ht_opmode = nconf->ht_opmode; sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_HT); } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathToRootTimeout = nconf->dot11MeshHWMPactivePathToRootTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { conf->power_mode = nconf->power_mode; ieee80211_mps_local_status_update(sdata); } if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) conf->plink_timeout = nconf->plink_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask)) conf->dot11MeshConnectedToMeshGate = nconf->dot11MeshConnectedToMeshGate; if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) conf->dot11MeshNolearn = nconf->dot11MeshNolearn; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask)) conf->dot11MeshConnectedToAuthServer = nconf->dot11MeshConnectedToAuthServer; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, const struct mesh_config *conf, const struct mesh_setup *setup) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; lockdep_assert_wiphy(sdata->local->hw.wiphy); memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) return err; sdata->control_port_over_nl80211 = setup->control_port_over_nl80211; /* can mesh use other SMPS modes? */ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_stop_mesh(sdata); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); return 0; } #endif static int ieee80211_change_bss(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; u64 changed = 0; link = ieee80211_link_or_deflink(sdata, params->link_id, true); if (IS_ERR(link)) return PTR_ERR(link); if (!sdata_dereference(link->u.ap.beacon, sdata)) return -ENOENT; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->basic_rates) { if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, &link->conf->basic_rates)) return -EINVAL; changed |= BSS_CHANGED_BASIC_RATES; ieee80211_check_rate_mask(link); } if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { link->conf->use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (!link->conf->use_short_slot && (sband->band == NL80211_BAND_5GHZ || sband->band == NL80211_BAND_6GHZ)) { link->conf->use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } if (params->use_short_slot_time >= 0) { link->conf->use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { link->conf->ht_operation_mode = (u16)params->ht_opmode; changed |= BSS_CHANGED_HT; } if (params->p2p_ctwindow >= 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; link->conf->p2p_noa_attr.oppps_ctwindow |= params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } if (params->p2p_opp_ps > 0) { link->conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } else if (params->p2p_opp_ps == 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static int ieee80211_set_txq_params(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, params->link_id, true); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) return -EOPNOTSUPP; if (local->hw.queues < IEEE80211_NUM_ACS) return -EOPNOTSUPP; if (IS_ERR(link)) return PTR_ERR(link); memset(&p, 0, sizeof(p)); p.aifs = params->aifs; p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; /* * Setting tx queue params disables u-apsd because it's only * called in master mode. */ p.uapsd = false; ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac); link->tx_conf[params->ac] = p; if (drv_conf_tx(local, link, params->ac, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for AC %d\n", params->ac); return -EINVAL; } ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_QOS); return 0; } #ifdef CONFIG_PM static int ieee80211_suspend(struct wiphy *wiphy, struct cfg80211_wowlan *wowlan) { return __ieee80211_suspend(wiphy_priv(wiphy), wowlan); } static int ieee80211_resume(struct wiphy *wiphy) { return __ieee80211_resume(wiphy_priv(wiphy)); } #else #define ieee80211_suspend NULL #define ieee80211_resume NULL #endif static int ieee80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *req) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; /* * FIXME: implement NoA while scanning in software, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ fallthrough; case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports * forcing), don't care about being beaconing already. * This will create problems to the attached stations (e.g. all * the frames sent while scanning on other channel will be * lost) */ if (sdata->deflink.u.ap.beacon && (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } return ieee80211_request_scan(sdata, req); } static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_scan_cancel(wiphy_priv(wiphy)); } static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *req) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata->local->ops->sched_scan_start) return -EOPNOTSUPP; return ieee80211_request_sched_scan_start(sdata, req); } static int ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev, u64 reqid) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->sched_scan_stop) return -EOPNOTSUPP; return ieee80211_request_sched_scan_stop(local); } static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req) { return ieee80211_mgd_auth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_request *req) { return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req) { return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req) { return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params) { return ieee80211_ibss_join(IEEE80211_DEV_TO_SUB_IF(dev), params); } static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ibss_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_join_ocb(struct wiphy *wiphy, struct net_device *dev, struct ocb_setup *setup) { return ieee80211_ocb_join(IEEE80211_DEV_TO_SUB_IF(dev), setup); } static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ocb_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); if (ieee80211_sdata_running(sdata)) ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MCAST_RATE); return 0; } static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) { struct ieee80211_local *local = wiphy_priv(wiphy); int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { ieee80211_check_fast_xmit_all(local); err = drv_set_frag_threshold(local, wiphy->frag_threshold); if (err) { ieee80211_check_fast_xmit_all(local); return err; } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || (changed & WIPHY_PARAM_DYN_ACK)) { s16 coverage_class; coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ? wiphy->coverage_class : -1; err = drv_set_coverage_class(local, coverage_class); if (err) return err; } if (changed & WIPHY_PARAM_RTS_THRESHOLD) { err = drv_set_rts_threshold(local, wiphy->rts_threshold); if (err) return err; } if (changed & WIPHY_PARAM_RETRY_SHORT) { if (wiphy->retry_short > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; } if (changed & WIPHY_PARAM_RETRY_LONG) { if (wiphy->retry_long > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; } if (changed & (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); if (changed & (WIPHY_PARAM_TXQ_LIMIT | WIPHY_PARAM_TXQ_MEMORY_LIMIT | WIPHY_PARAM_TXQ_QUANTUM)) ieee80211_txq_set_params(local); return 0; } static int ieee80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; bool has_monitor = false; lockdep_assert_wiphy(local->hw.wiphy); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return -EOPNOTSUPP; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) return -EOPNOTSUPP; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->deflink.user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; sdata->deflink.user_power_level = MBM_TO_DBM(mbm); break; } if (txp_type != sdata->vif.bss_conf.txpower_type) { update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } ieee80211_recalc_txpower(sdata, update_txp_type); return 0; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; local->user_power_level = MBM_TO_DBM(mbm); break; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { has_monitor = true; continue; } sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; ieee80211_recalc_txpower(sdata, update_txp_type); } if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; ieee80211_recalc_txpower(sdata, update_txp_type); } } return 0; } static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (local->ops->get_txpower) return drv_get_txpower(local, sdata, dbm); if (local->emulate_chanctx) *dbm = local->hw.conf.power_level; else *dbm = sdata->vif.bss_conf.txpower; /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) return -EINVAL; return 0; } static void ieee80211_rfkill_poll(struct wiphy *wiphy) { struct ieee80211_local *local = wiphy_priv(wiphy); drv_rfkill_poll(local); } #ifdef CONFIG_NL80211_TESTMODE static int ieee80211_testmode_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_vif *vif = NULL; if (!local->ops->testmode_cmd) return -EOPNOTSUPP; if (wdev) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) vif = &sdata->vif; } return local->ops->testmode_cmd(&local->hw, vif, data, len); } static int ieee80211_testmode_dump(struct wiphy *wiphy, struct sk_buff *skb, struct netlink_callback *cb, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->testmode_dump) return -EOPNOTSUPP; return local->ops->testmode_dump(&local->hw, skb, cb, data, len); } #endif int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { const u8 *ap; enum ieee80211_smps_mode old_req; int err; struct sta_info *sta; bool tdls_peer_found = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; /* The driver indicated that EML is enabled for the interface, which * implies that SMPS flows towards the AP should be stopped. */ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return 0; if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; /* * If not associated, or current association is not an HT * association, there's no need to do anything, just store * the new value until we associate. */ if (!sdata->u.mgd.associated || link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; tdls_peer_found = true; break; } rcu_read_unlock(); if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { if (tdls_peer_found || !sdata->u.mgd.powersave) smps_mode = IEEE80211_SMPS_OFF; else smps_mode = IEEE80211_SMPS_DYNAMIC; } /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, ap, ap, ieee80211_vif_is_mld(&sdata->vif) ? link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) ieee80211_teardown_tdls_peers(link); return err; } static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); unsigned int link_id; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); ieee80211_check_fast_rx_iface(sdata); return 0; } static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, s32 rssi_thold, u32 rssi_hyst, s32 rssi_low, s32 rssi_high) { struct ieee80211_bss_conf *conf; if (!link || !link->conf) return; conf = link->conf; if (rssi_thold && rssi_hyst && rssi_thold == conf->cqm_rssi_thold && rssi_hyst == conf->cqm_rssi_hyst) return; conf->cqm_rssi_thold = rssi_thold; conf->cqm_rssi_hyst = rssi_hyst; conf->cqm_rssi_low = rssi_low; conf->cqm_rssi_high = rssi_high; link->u.mgd.last_cqm_event_signal = 0; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return; if (sdata->u.mgd.associated && (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM); } static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER && !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst, 0, 0); } return 0; } static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_low, s32 rssi_high) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, 0, 0, rssi_low, rssi_high); } return 0; } static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int i, ret; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able * to send something, and if we're an AP we have to be able to do * so at a basic rate so that all clients can receive it. */ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && sdata->vif.bss_conf.chanreq.oper.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; enum nl80211_band band; band = sdata->vif.bss_conf.chanreq.oper.chan->band; if (!(mask->control[band].legacy & basic_rates)) return -EINVAL; } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { ret = drv_set_bitrate_mask(local, sdata, mask); if (ret) return ret; } for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, sizeof(mask->control[i].ht_mcs)); memcpy(sdata->rc_rateidx_vht_mcs_mask[i], mask->control[i].vht_mcs, sizeof(mask->control[i].vht_mcs)); sdata->rc_has_mcs_mask[i] = false; sdata->rc_has_vht_mcs_mask[i] = false; if (!sband) continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) { sdata->rc_has_mcs_mask[i] = true; break; } } for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) { sdata->rc_has_vht_mcs_mask[i] = true; break; } } } return 0; } static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms, int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = *chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) return -ENOLINK; /* whatever, but channel contexts should not complain about that one */ link_data->smps_mode = IEEE80211_SMPS_OFF; link_data->needed_rx_chains = local->rx_chains; err = ieee80211_link_use_channel(link_data, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work, msecs_to_jiffies(cac_time_ms)); return 0; } static void ieee80211_end_cac(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) continue; wiphy_delayed_work_cancel(wiphy, &link_data->dfs_cac_timer_work); if (sdata->wdev.links[link_id].cac_started) { ieee80211_link_release_channel(link_data); sdata->wdev.links[link_id].cac_started = false; } } } static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { struct cfg80211_beacon_data *new_beacon; u8 *pos; int len; len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; if (beacon->mbssid_ies) len += ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, beacon->rnr_ies, beacon->mbssid_ies->cnt); new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) return NULL; if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { new_beacon->mbssid_ies = kzalloc(struct_size(new_beacon->mbssid_ies, elem, beacon->mbssid_ies->cnt), GFP_KERNEL); if (!new_beacon->mbssid_ies) { kfree(new_beacon); return NULL; } if (beacon->rnr_ies && beacon->rnr_ies->cnt) { new_beacon->rnr_ies = kzalloc(struct_size(new_beacon->rnr_ies, elem, beacon->rnr_ies->cnt), GFP_KERNEL); if (!new_beacon->rnr_ies) { kfree(new_beacon->mbssid_ies); kfree(new_beacon); return NULL; } } } pos = (u8 *)(new_beacon + 1); if (beacon->head_len) { new_beacon->head_len = beacon->head_len; new_beacon->head = pos; memcpy(pos, beacon->head, beacon->head_len); pos += beacon->head_len; } if (beacon->tail_len) { new_beacon->tail_len = beacon->tail_len; new_beacon->tail = pos; memcpy(pos, beacon->tail, beacon->tail_len); pos += beacon->tail_len; } if (beacon->beacon_ies_len) { new_beacon->beacon_ies_len = beacon->beacon_ies_len; new_beacon->beacon_ies = pos; memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len); pos += beacon->beacon_ies_len; } if (beacon->proberesp_ies_len) { new_beacon->proberesp_ies_len = beacon->proberesp_ies_len; new_beacon->proberesp_ies = pos; memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len); pos += beacon->proberesp_ies_len; } if (beacon->assocresp_ies_len) { new_beacon->assocresp_ies_len = beacon->assocresp_ies_len; new_beacon->assocresp_ies = pos; memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len); pos += beacon->assocresp_ies_len; } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { pos += ieee80211_copy_mbssid_beacon(pos, new_beacon->mbssid_ies, beacon->mbssid_ies); if (beacon->rnr_ies && beacon->rnr_ies->cnt) pos += ieee80211_copy_rnr_beacon(pos, new_beacon->rnr_ies, beacon->rnr_ies); } /* might copy -1, meaning no changes requested */ new_beacon->ftm_responder = beacon->ftm_responder; if (beacon->lci) { new_beacon->lci_len = beacon->lci_len; new_beacon->lci = pos; memcpy(pos, beacon->lci, beacon->lci_len); pos += beacon->lci_len; } if (beacon->civicloc) { new_beacon->civicloc_len = beacon->civicloc_len; new_beacon->civicloc = pos; memcpy(pos, beacon->civicloc, beacon->civicloc_len); pos += beacon->civicloc_len; } return new_beacon; } void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link_data = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link_data)) { rcu_read_unlock(); return; } /* TODO: MBSSID with MLO changes */ if (vif->mbssid_tx_vif == vif) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on * transmitting interface */ struct ieee80211_sub_if_data *iter; list_for_each_entry_rcu(iter, &local->interfaces, list) { if (!ieee80211_sdata_running(iter)) continue; if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; wiphy_work_queue(iter->local->hw.wiphy, &iter->deflink.csa.finalize_work); } } wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_csa_finish); void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; sdata->csa_blocked_queues = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: if (!link_data->u.ap.next_beacon) return -EINVAL; err = ieee80211_assign_beacon(sdata, link_data, link_data->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link_data); if (err < 0) return err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata, changed); if (err < 0) return err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata, changed); if (err < 0) return err; break; #endif default: WARN_ON(1); return -EINVAL; } return 0; } static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf = link_data->conf; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later * with multi-vif. once reservation is complete it will re-schedule the * work with no reserved_chanctx so verify chandef to check if it * completed successfully */ if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(link_data); } if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, &link_data->csa.chanreq.oper)) return -EINVAL; link_conf->csa_active = false; err = ieee80211_set_after_csa_beacon(link_data, &changed); if (err) return err; ieee80211_link_info_change_notify(sdata, link_data, changed); ieee80211_vif_unblock_queues_csa(sdata); err = drv_post_channel_switch(link_data); if (err) return err; cfg80211_ch_switch_notify(sdata->dev, &link_data->csa.chanreq.oper, link_data->link_id); return 0; } static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n", link_data->link_id); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } } void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, csa.finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link->conf->csa_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data, struct cfg80211_csa_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link_data->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after); if (!link_data->u.ap.next_beacon) return -ENOMEM; /* * With a count of 0, we don't have to wait for any * TBTT before switching, so complete the CSA * immediately. In theory, with a count == 1 we * should delay the switch until just before the next * TBTT, but that would complicate things so we switch * immediately too. If we would delay the switch * until the next TBTT, we would have to set the probe * response here. * * TODO: A channel switch with count <= 1 without * sending a CSA action frame is kind of useless, * because the clients won't know we're changing * channels. The action frame must be implemented * either here or in the userspace. */ if (params->count <= 1) break; if ((params->n_counter_offsets_beacon > IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { ieee80211_free_next_beacon(link_data); return -EINVAL; } csa.counter_offsets_beacon = params->counter_offsets_beacon; csa.counter_offsets_presp = params->counter_offsets_presp; csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon; csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; err = ieee80211_assign_beacon(sdata, link_data, &params->beacon_csa, &csa, NULL, changed); if (err < 0) { ieee80211_free_next_beacon(link_data); return err; } break; case NL80211_IFTYPE_ADHOC: if (!sdata->vif.cfg.ibss_joined) return -EINVAL; if (params->chandef.width != sdata->u.ibss.chandef.width) return -EINVAL; switch (params->chandef.width) { case NL80211_CHAN_WIDTH_40: if (cfg80211_get_chandef_type(&params->chandef) != cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) return -EINVAL; break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: break; default: return -EINVAL; } /* changes into another band are not supported */ if (sdata->u.ibss.chandef.chan->band != params->chandef.chan->band) return -EINVAL; /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_ibss_csa_beacon(sdata, params, changed); if (err < 0) return err; } ieee80211_send_action_csa(sdata, params); break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* changes into another band are not supported */ if (sdata->vif.bss_conf.chanreq.oper.chan->band != params->chandef.chan->band) return -EINVAL; if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_NONE) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_INIT; if (!ifmsh->pre_value) ifmsh->pre_value = 1; else ifmsh->pre_value++; } /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_mesh_csa_beacon(sdata, params, changed); if (err < 0) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) ieee80211_send_action_csa(sdata, params); break; } #endif default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_abort(struct ieee80211_link_data *link) { link->conf->color_change_active = false; ieee80211_free_next_beacon(link); cfg80211_color_change_aborted_notify(link->sdata->dev, link->link_id); } static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_channel_switch ch_switch = { .link_id = params->link_id, }; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link_data; u64 changed = 0; u8 link_id = params->link_id; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; if (sdata->wdev.links[link_id].cac_started) return -EBUSY; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link_data = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link_data) return -ENOLINK; link_conf = link_data->conf; if (chanreq.oper.punctured && !link_conf->eht_support) return -EINVAL; /* don't allow another channel switch if one is already active. */ if (link_conf->csa_active) return -EBUSY; conf = wiphy_dereference(wiphy, link_conf->chanctx_conf); if (!conf) { err = -EBUSY; goto out; } if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ err = -EOPNOTSUPP; goto out; } chanctx = container_of(conf, struct ieee80211_chanctx, conf); ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; ch_switch.block_tx = params->block_tx; ch_switch.chandef = chanreq.oper; ch_switch.count = params->count; err = drv_pre_channel_switch(sdata, &ch_switch); if (err) goto out; err = ieee80211_link_reserve_chanctx(link_data, &chanreq, chanctx->mode, params->radar_required); if (err) goto out; /* if reservation is invalid then this will fail */ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0, -1); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } /* if there is a color change in progress, abort it */ if (link_conf->color_change_active) ieee80211_color_change_abort(link_data); err = ieee80211_set_csa_beacon(link_data, params, &changed); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } link_data->csa.chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx) ieee80211_vif_block_queues_csa(sdata); cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa.chanreq.oper, link_id, params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, link_data, changed); drv_channel_switch_beacon(sdata, &link_data->csa.chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(link_data); } out: return err; } int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; /* wow, you wrapped 64 bits ... more likely a bug */ if (WARN_ON(local->roc_cookie_counter == 0)) local->roc_cookie_counter++; return local->roc_cookie_counter; } int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; int id; ack_skb = skb_copy(skb, gfp); if (!ack_skb) return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { kfree_skb(ack_skb); return -ENOMEM; } IEEE80211_SKB_CB(skb)->status_data_idr = 1; IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; return 0; } static void ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4); bool global_change, intf_change; global_change = (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) || (local->rx_mcast_action_reg != !!(upd->global_mcast_stypes & action_mask)); local->probe_req_reg = upd->global_stypes & preq_mask; local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask; intf_change = (sdata->vif.probe_req_reg != !!(upd->interface_stypes & preq_mask)) || (sdata->vif.rx_mcast_action_reg != !!(upd->interface_mcast_stypes & action_mask)); sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask; sdata->vif.rx_mcast_action_reg = upd->interface_mcast_stypes & action_mask; if (!local->open_count) return; if (intf_change && ieee80211_sdata_running(sdata)) drv_config_iface_filter(local, sdata, sdata->vif.probe_req_reg ? FIF_PROBE_REQ : 0, FIF_PROBE_REQ); if (global_change) ieee80211_configure_filter(local); } static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); int ret; if (local->started) return -EOPNOTSUPP; ret = drv_set_antenna(local, tx_ant, rx_ant); if (ret) return ret; local->rx_chains = hweight8(rx_ant); return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); return drv_get_antenna(local, tx_ant, rx_ant); } static int ieee80211_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!local->ops->set_rekey_data) return -EOPNOTSUPP; drv_set_rekey_data(local, sdata, data); return 0; } static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); if (!sta) { ret = -ENOLINK; goto unlock; } qos = sta->sta.wme; chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { ret = -EINVAL; goto unlock; } band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) { ret = -ENOMEM; goto unlock; } skb->dev = dev; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); if (ret) { kfree_skb(skb); goto unlock; } local_bh_disable(); ieee80211_xmit(sdata, sta, skb); local_bh_enable(); ret = 0; unlock: rcu_read_unlock(); return ret; } static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_link_data *link; int ret = -ENODATA; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (!link) { ret = -ENOLINK; goto out; } chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; ret = 0; } out: rcu_read_unlock(); return ret; } #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { drv_set_wakeup(wiphy_priv(wiphy), enabled); } #endif static int ieee80211_set_qos_map(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_qos_map *qos_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mac80211_qos_map *new_qos_map, *old_qos_map; if (qos_map) { new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); if (!new_qos_map) return -ENOMEM; memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); } else { /* A NULL qos_map was passed to disable QoS mapping */ new_qos_map = NULL; } old_qos_map = sdata_dereference(sdata->qos_map, sdata); rcu_assign_pointer(sdata->qos_map, new_qos_map); if (old_qos_map) kfree_rcu(old_qos_map, rcu_head); return 0; } static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; u64 changed = 0; link = sdata_dereference(sdata->link[link_id], sdata); ret = ieee80211_link_change_chanreq(link, &chanreq, &changed); if (ret == 0) ieee80211_link_info_change_notify(sdata, link, changed); return ret; } static int ieee80211_add_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer, u8 up, u16 admitted_time) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ac = ieee802_1d_to_ac[up]; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(sdata->wmm_acm & BIT(up))) return -EINVAL; if (ifmgd->tx_tspec[ac].admitted_time) return -EBUSY; if (admitted_time) { ifmgd->tx_tspec[ac].admitted_time = 32 * admitted_time; ifmgd->tx_tspec[ac].tsid = tsid; ifmgd->tx_tspec[ac].up = up; } return 0; } static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = wiphy_priv(wiphy); int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; /* skip unused entries */ if (!tx_tspec->admitted_time) continue; if (tx_tspec->tsid != tsid) continue; /* due to this new packets will be reassigned to non-ACM ACs */ tx_tspec->up = -1; /* Make sure that all packets have been sent to avoid to * restore the QoS params on packets that are still on the * queues. */ synchronize_net(); ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) */ tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; tx_tspec->downgraded = false; ieee80211_sta_handle_tspec_ac_params(sdata); /* finally clear all the data */ memset(tx_tspec, 0, sizeof(*tx_tspec)); return 0; } return -ENOENT; } void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; u64 cookie; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } cookie = func->cookie; idr_remove(&sdata->u.nan.function_inst_ids, inst_id); spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_free_nan_func(func); cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, reason, cookie, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_terminated); void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } match->cookie = func->cookie; spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_match); static int ieee80211_set_multicast_to_unicast(struct wiphy *wiphy, struct net_device *dev, const bool enabled) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->u.ap.multicast_to_unicast = enabled; return 0; } void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, struct txq_info *txqi) { if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_BYTES); txqstats->backlog_bytes = txqi->tin.backlog_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS); txqstats->backlog_packets = txqi->tin.backlog_packets; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_FLOWS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_FLOWS); txqstats->flows = txqi->tin.flows; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_DROPS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_DROPS); txqstats->drops = txqi->cstats.drop_count; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_ECN_MARKS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_ECN_MARKS); txqstats->ecn_marks = txqi->cstats.ecn_mark; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_OVERLIMIT))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_OVERLIMIT); txqstats->overlimit = txqi->tin.overlimit; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_COLLISIONS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_COLLISIONS); txqstats->collisions = txqi->tin.collisions; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_BYTES); txqstats->tx_bytes = txqi->tin.tx_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_PACKETS); txqstats->tx_packets = txqi->tin.tx_packets; } } static int ieee80211_get_txq_stats(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_txq_stats *txqstats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; int ret = 0; spin_lock_bh(&local->fq.lock); rcu_read_lock(); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (!sdata->vif.txq) { ret = 1; goto out; } ieee80211_fill_txq_stats(txqstats, to_txq_info(sdata->vif.txq)); } else { /* phy stats */ txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS) | BIT(NL80211_TXQ_STATS_BACKLOG_BYTES) | BIT(NL80211_TXQ_STATS_OVERLIMIT) | BIT(NL80211_TXQ_STATS_OVERMEMORY) | BIT(NL80211_TXQ_STATS_COLLISIONS) | BIT(NL80211_TXQ_STATS_MAX_FLOWS); txqstats->backlog_packets = local->fq.backlog; txqstats->backlog_bytes = local->fq.memory_usage; txqstats->overlimit = local->fq.overlimit; txqstats->overmemory = local->fq.overmemory; txqstats->collisions = local->fq.collisions; txqstats->max_flows = local->fq.flows_cnt; } out: rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); return ret; } static int ieee80211_get_ftm_responder_stats(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ftm_responder_stats *ftm_stats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); return drv_get_ftm_responder_stats(local, sdata, ftm_stats); } static int ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_start_pmsr(local, sdata, request); } static void ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_abort_pmsr(local, sdata, request); } static int ieee80211_set_tid_config(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_tid_config *tid_conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); sta = sta_info_get_bss(sdata, tid_conf->peer); if (!sta) return -ENOENT; return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 tids) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); sta = sta_info_get_bss(sdata, peer); if (!sta) return -ENOENT; return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, struct cfg80211_sar_specs *sar) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_sar_specs) return -EOPNOTSUPP; return local->ops->set_sar_specs(&local->hw, sar); } static int ieee80211_set_after_color_change_beacon(struct ieee80211_link_data *link, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: { int ret; if (!link->u.ap.next_beacon) return -EINVAL; ret = ieee80211_assign_beacon(sdata, link, link->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link); if (ret < 0) return ret; break; } default: WARN_ON_ONCE(1); return -EINVAL; } return 0; } static int ieee80211_set_color_change_beacon(struct ieee80211_link_data *link, struct cfg80211_color_change_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_color_change_settings color_change = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_next); if (!link->u.ap.next_beacon) return -ENOMEM; if (params->count <= 1) break; color_change.counter_offset_beacon = params->counter_offset_beacon; color_change.counter_offset_presp = params->counter_offset_presp; color_change.count = params->count; err = ieee80211_assign_beacon(sdata, link, &params->beacon_color_change, NULL, &color_change, changed); if (err < 0) { ieee80211_free_next_beacon(link); return err; } break; default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link, u8 color, int enable, u64 changed) { struct ieee80211_sub_if_data *sdata = link->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); link->conf->he_bss_color.color = color; link->conf->he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_link_info_change_notify(sdata, link, changed); if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; child->vif.bss_conf.he_bss_color.enabled = enable; ieee80211_link_info_change_notify(child, &child->deflink, BSS_CHANGED_HE_BSS_COLOR); } } } } static int ieee80211_color_change_finalize(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); link->conf->color_change_active = false; err = ieee80211_set_after_color_change_beacon(link, &changed); if (err) { cfg80211_color_change_aborted_notify(sdata->dev, link->link_id); return err; } ieee80211_color_change_bss_config_notify(link, link->conf->color_change_color, 1, changed); cfg80211_color_change_notify(sdata->dev, link->link_id); return 0; } void ieee80211_color_change_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, color_change_finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *link_conf = link->conf; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link_conf->color_change_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_color_change_finalize(link); } void ieee80211_color_collision_detection_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct ieee80211_link_data *link = container_of(delayed_work, struct ieee80211_link_data, color_collision_detect_work); struct ieee80211_sub_if_data *sdata = link->sdata; cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap, link->link_id); } void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } wiphy_work_queue(sdata->local->hw.wiphy, &link->color_change_finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, u64 color_bitmap, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } if (link->conf->color_change_active || link->conf->csa_active) { rcu_read_unlock(); return; } if (delayed_work_pending(&link->color_collision_detect_work)) { rcu_read_unlock(); return; } link->color_bitmap = color_bitmap; /* queue the color collision detection event every 500 ms in order to * avoid sending too much netlink messages to userspace. */ ieee80211_queue_delayed_work(&sdata->local->hw, &link->color_collision_detect_work, msecs_to_jiffies(500)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify); static int ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link; u8 link_id = params->link_id; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) return -ENOLINK; link_conf = link->conf; if (link_conf->nontransmitted) return -EINVAL; /* don't allow another color change if one is already active or if csa * is active */ if (link_conf->color_change_active || link_conf->csa_active) { err = -EBUSY; goto out; } err = ieee80211_set_color_change_beacon(link, params, &changed); if (err) goto out; link_conf->color_change_active = true; link_conf->color_change_color = params->color; cfg80211_color_change_started_notify(sdata->dev, params->count, link_id); if (changed) ieee80211_color_change_bss_config_notify(link, 0, 0, changed); else /* if the beacon didn't change, we can finalize immediately */ ieee80211_color_change_finalize(link); out: return err; } static int ieee80211_set_radar_background(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_radar_background) return -EOPNOTSUPP; return local->ops->set_radar_background(&local->hw, chandef); } static int ieee80211_add_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static int ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!sta->sta.valid_links) return -EINVAL; if (sta->sta.valid_links & BIT(params->link_id)) return -EALREADY; ret = ieee80211_sta_allocate_link(sta, params->link_id); if (ret) return ret; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_NEW, params); if (ret) { ieee80211_sta_free_link(sta, params->link_id); return ret; } /* ieee80211_sta_activate_link frees the link upon failure */ return ieee80211_sta_activate_link(sta, params->link_id); } static int ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; return sta_link_apply_parameters(local, sta, STA_LINK_MODE_LINK_MODIFY, params); } static int ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; /* must not create a STA without links */ if (sta->sta.valid_links == BIT(params->link_id)) return -EINVAL; ieee80211_sta_remove_link(sta, params->link_id); return 0; } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; if (!local->ops->set_hw_timestamp) return -EOPNOTSUPP; if (!check_sdata_in_driver(sdata)) return -EIO; return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); } static int ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return ieee80211_req_neg_ttlm(sdata, params); } const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, .start_p2p_device = ieee80211_start_p2p_device, .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, .set_default_beacon_key = ieee80211_config_default_beacon_key, .start_ap = ieee80211_start_ap, .change_beacon = ieee80211_change_beacon, .stop_ap = ieee80211_stop_ap, .add_station = ieee80211_add_station, .del_station = ieee80211_del_station, .change_station = ieee80211_change_station, .get_station = ieee80211_get_station, .dump_station = ieee80211_dump_station, .dump_survey = ieee80211_dump_survey, #ifdef CONFIG_MAC80211_MESH .add_mpath = ieee80211_add_mpath, .del_mpath = ieee80211_del_mpath, .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, .get_mpp = ieee80211_get_mpp, .dump_mpp = ieee80211_dump_mpp, .update_mesh_config = ieee80211_update_mesh_config, .get_mesh_config = ieee80211_get_mesh_config, .join_mesh = ieee80211_join_mesh, .leave_mesh = ieee80211_leave_mesh, #endif .join_ocb = ieee80211_join_ocb, .leave_ocb = ieee80211_leave_ocb, .change_bss = ieee80211_change_bss, .inform_bss = ieee80211_inform_bss, .set_txq_params = ieee80211_set_txq_params, .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, .assoc = ieee80211_assoc, .deauth = ieee80211_deauth, .disassoc = ieee80211_disassoc, .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, .set_mcast_rate = ieee80211_set_mcast_rate, .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) .set_power_mgmt = ieee80211_set_power_mgmt, .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, .update_mgmt_frame_registrations = ieee80211_update_mgmt_frame_registrations, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, .set_rekey_data = ieee80211_set_rekey_data, .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .tdls_channel_switch = ieee80211_tdls_channel_switch, .tdls_cancel_channel_switch = ieee80211_tdls_cancel_channel_switch, .probe_client = ieee80211_probe_client, .set_noack_map = ieee80211_set_noack_map, #ifdef CONFIG_PM .set_wakeup = ieee80211_set_wakeup, #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, .add_nan_func = ieee80211_add_nan_func, .del_nan_func = ieee80211_del_nan_func, .set_multicast_to_unicast = ieee80211_set_multicast_to_unicast, .tx_control_port = ieee80211_tx_control_port, .get_txq_stats = ieee80211_get_txq_stats, .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats, .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, .probe_mesh_link = ieee80211_probe_mesh_link, .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, .set_radar_background = ieee80211_set_radar_background, .add_intf_link = ieee80211_add_intf_link, .del_intf_link = ieee80211_del_intf_link, .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, .set_ttlm = ieee80211_set_ttlm, .get_radio_mask = ieee80211_get_radio_mask, };
15 8 8 8 4 3 2 31 2 1 2 26 8 8 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Linaro Ltd */ #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/skbuff.h> #include <linux/uaccess.h> #include "qrtr.h" struct qrtr_tun { struct qrtr_endpoint ep; struct sk_buff_head queue; wait_queue_head_t readq; }; static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb) { struct qrtr_tun *tun = container_of(ep, struct qrtr_tun, ep); skb_queue_tail(&tun->queue, skb); /* wake up any blocking processes, waiting for new data */ wake_up_interruptible(&tun->readq); return 0; } static int qrtr_tun_open(struct inode *inode, struct file *filp) { struct qrtr_tun *tun; int ret; tun = kzalloc(sizeof(*tun), GFP_KERNEL); if (!tun) return -ENOMEM; skb_queue_head_init(&tun->queue); init_waitqueue_head(&tun->readq); tun->ep.xmit = qrtr_tun_send; filp->private_data = tun; ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); if (ret) goto out; return 0; out: filp->private_data = NULL; kfree(tun); return ret; } static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *filp = iocb->ki_filp; struct qrtr_tun *tun = filp->private_data; struct sk_buff *skb; int count; while (!(skb = skb_dequeue(&tun->queue))) { if (filp->f_flags & O_NONBLOCK) return -EAGAIN; /* Wait until we get data or the endpoint goes away */ if (wait_event_interruptible(tun->readq, !skb_queue_empty(&tun->queue))) return -ERESTARTSYS; } count = min_t(size_t, iov_iter_count(to), skb->len); if (copy_to_iter(skb->data, count, to) != count) count = -EFAULT; kfree_skb(skb); return count; } static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *filp = iocb->ki_filp; struct qrtr_tun *tun = filp->private_data; size_t len = iov_iter_count(from); ssize_t ret; void *kbuf; if (!len) return -EINVAL; if (len > KMALLOC_MAX_SIZE) return -ENOMEM; kbuf = kzalloc(len, GFP_KERNEL); if (!kbuf) return -ENOMEM; if (!copy_from_iter_full(kbuf, len, from)) { kfree(kbuf); return -EFAULT; } ret = qrtr_endpoint_post(&tun->ep, kbuf, len); kfree(kbuf); return ret < 0 ? ret : len; } static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait) { struct qrtr_tun *tun = filp->private_data; __poll_t mask = 0; poll_wait(filp, &tun->readq, wait); if (!skb_queue_empty(&tun->queue)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } static int qrtr_tun_release(struct inode *inode, struct file *filp) { struct qrtr_tun *tun = filp->private_data; qrtr_endpoint_unregister(&tun->ep); /* Discard all SKBs */ skb_queue_purge(&tun->queue); kfree(tun); return 0; } static const struct file_operations qrtr_tun_ops = { .owner = THIS_MODULE, .open = qrtr_tun_open, .poll = qrtr_tun_poll, .read_iter = qrtr_tun_read_iter, .write_iter = qrtr_tun_write_iter, .release = qrtr_tun_release, }; static struct miscdevice qrtr_tun_miscdev = { MISC_DYNAMIC_MINOR, "qrtr-tun", &qrtr_tun_ops, }; static int __init qrtr_tun_init(void) { int ret; ret = misc_register(&qrtr_tun_miscdev); if (ret) pr_err("failed to register Qualcomm IPC Router tun device\n"); return ret; } static void __exit qrtr_tun_exit(void) { misc_deregister(&qrtr_tun_miscdev); } module_init(qrtr_tun_init); module_exit(qrtr_tun_exit); MODULE_DESCRIPTION("Qualcomm IPC Router TUN device"); MODULE_LICENSE("GPL v2");
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 /* * slcan.c - serial line CAN interface driver (using tty line discipline) * * This file is derived from linux/drivers/net/slip/slip.c and got * inspiration from linux/drivers/net/can/can327.c for the rework made * on the line discipline code. * * slip.c Authors : Laurence Culhane <loz@holmes.demon.co.uk> * Fred N. van Kempen <waltje@uwalt.nl.mugnet.org> * slcan.c Author : Oliver Hartkopp <socketcan@hartkopp.net> * can327.c Author : Max Staudt <max-linux@enpas.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, see http://www.gnu.org/licenses/gpl.html * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/string.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include "slcan.h" MODULE_ALIAS_LDISC(N_SLCAN); MODULE_DESCRIPTION("serial line CAN interface"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>"); MODULE_AUTHOR("Dario Binacchi <dario.binacchi@amarulasolutions.com>"); /* maximum rx buffer len: extended CAN frame with timestamp */ #define SLCAN_MTU (sizeof("T1111222281122334455667788EA5F\r") + 1) #define SLCAN_CMD_LEN 1 #define SLCAN_SFF_ID_LEN 3 #define SLCAN_EFF_ID_LEN 8 #define SLCAN_STATE_LEN 1 #define SLCAN_STATE_BE_RXCNT_LEN 3 #define SLCAN_STATE_BE_TXCNT_LEN 3 #define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \ SLCAN_STATE_BE_RXCNT_LEN + \ SLCAN_STATE_BE_TXCNT_LEN) struct slcan { struct can_priv can; /* Various fields. */ struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ spinlock_t lock; struct work_struct tx_work; /* Flushes transmit buffer */ /* These are pointers to the malloc()ed frame buffers. */ unsigned char rbuff[SLCAN_MTU]; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char xbuff[SLCAN_MTU]; /* transmitter buffer*/ unsigned char *xhead; /* pointer to next XMIT byte */ int xleft; /* bytes left in XMIT queue */ unsigned long flags; /* Flag values/ mode etc */ #define SLF_ERROR 0 /* Parity, etc. error */ #define SLF_XCMD 1 /* Command transmission */ unsigned long cmd_flags; /* Command flags */ #define CF_ERR_RST 0 /* Reset errors on open */ wait_queue_head_t xcmd_wait; /* Wait queue for commands */ /* transmission */ }; static const u32 slcan_bitrate_const[] = { 10000, 20000, 50000, 100000, 125000, 250000, 500000, 800000, 1000000 }; bool slcan_err_rst_on_open(struct net_device *ndev) { struct slcan *sl = netdev_priv(ndev); return !!test_bit(CF_ERR_RST, &sl->cmd_flags); } int slcan_enable_err_rst_on_open(struct net_device *ndev, bool on) { struct slcan *sl = netdev_priv(ndev); if (netif_running(ndev)) return -EBUSY; if (on) set_bit(CF_ERR_RST, &sl->cmd_flags); else clear_bit(CF_ERR_RST, &sl->cmd_flags); return 0; } /************************************************************************* * SLCAN ENCAPSULATION FORMAT * *************************************************************************/ /* A CAN frame has a can_id (11 bit standard frame format OR 29 bit extended * frame format) a data length code (len) which can be from 0 to 8 * and up to <len> data bytes as payload. * Additionally a CAN frame may become a remote transmission frame if the * RTR-bit is set. This causes another ECU to send a CAN frame with the * given can_id. * * The SLCAN ASCII representation of these different frame types is: * <type> <id> <dlc> <data>* * * Extended frames (29 bit) are defined by capital characters in the type. * RTR frames are defined as 'r' types - normal frames have 't' type: * t => 11 bit data frame * r => 11 bit RTR frame * T => 29 bit data frame * R => 29 bit RTR frame * * The <id> is 3 (standard) or 8 (extended) bytes in ASCII Hex (base64). * The <dlc> is a one byte ASCII number ('0' - '8') * The <data> section has at much ASCII Hex bytes as defined by the <dlc> * * Examples: * * t1230 : can_id 0x123, len 0, no data * t4563112233 : can_id 0x456, len 3, data 0x11 0x22 0x33 * T12ABCDEF2AA55 : extended can_id 0x12ABCDEF, len 2, data 0xAA 0x55 * r1230 : can_id 0x123, len 0, no data, remote transmission request * */ /************************************************************************* * STANDARD SLCAN DECAPSULATION * *************************************************************************/ /* Send one completely decapsulated can_frame to the network layer */ static void slcan_bump_frame(struct slcan *sl) { struct sk_buff *skb; struct can_frame *cf; int i, tmp; u32 tmpid; char *cmd = sl->rbuff; skb = alloc_can_skb(sl->dev, &cf); if (unlikely(!skb)) { sl->dev->stats.rx_dropped++; return; } switch (*cmd) { case 'r': cf->can_id = CAN_RTR_FLAG; fallthrough; case 't': /* store dlc ASCII value and terminate SFF CAN ID string */ cf->len = sl->rbuff[SLCAN_CMD_LEN + SLCAN_SFF_ID_LEN]; sl->rbuff[SLCAN_CMD_LEN + SLCAN_SFF_ID_LEN] = 0; /* point to payload data behind the dlc */ cmd += SLCAN_CMD_LEN + SLCAN_SFF_ID_LEN + 1; break; case 'R': cf->can_id = CAN_RTR_FLAG; fallthrough; case 'T': cf->can_id |= CAN_EFF_FLAG; /* store dlc ASCII value and terminate EFF CAN ID string */ cf->len = sl->rbuff[SLCAN_CMD_LEN + SLCAN_EFF_ID_LEN]; sl->rbuff[SLCAN_CMD_LEN + SLCAN_EFF_ID_LEN] = 0; /* point to payload data behind the dlc */ cmd += SLCAN_CMD_LEN + SLCAN_EFF_ID_LEN + 1; break; default: goto decode_failed; } if (kstrtou32(sl->rbuff + SLCAN_CMD_LEN, 16, &tmpid)) goto decode_failed; cf->can_id |= tmpid; /* get len from sanitized ASCII value */ if (cf->len >= '0' && cf->len < '9') cf->len -= '0'; else goto decode_failed; /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf->can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf->len; i++) { tmp = hex_to_bin(*cmd++); if (tmp < 0) goto decode_failed; cf->data[i] = (tmp << 4); tmp = hex_to_bin(*cmd++); if (tmp < 0) goto decode_failed; cf->data[i] |= tmp; } } sl->dev->stats.rx_packets++; if (!(cf->can_id & CAN_RTR_FLAG)) sl->dev->stats.rx_bytes += cf->len; netif_rx(skb); return; decode_failed: sl->dev->stats.rx_errors++; dev_kfree_skb(skb); } /* A change state frame must contain state info and receive and transmit * error counters. * * Examples: * * sb256256 : state bus-off: rx counter 256, tx counter 256 * sa057033 : state active, rx counter 57, tx counter 33 */ static void slcan_bump_state(struct slcan *sl) { struct net_device *dev = sl->dev; struct sk_buff *skb; struct can_frame *cf; char *cmd = sl->rbuff; u32 rxerr, txerr; enum can_state state, rx_state, tx_state; switch (cmd[1]) { case 'a': state = CAN_STATE_ERROR_ACTIVE; break; case 'w': state = CAN_STATE_ERROR_WARNING; break; case 'p': state = CAN_STATE_ERROR_PASSIVE; break; case 'b': state = CAN_STATE_BUS_OFF; break; default: return; } if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN) return; cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1; cmd[SLCAN_STATE_BE_TXCNT_LEN] = 0; if (kstrtou32(cmd, 10, &txerr)) return; *cmd = 0; cmd -= SLCAN_STATE_BE_RXCNT_LEN; if (kstrtou32(cmd, 10, &rxerr)) return; skb = alloc_can_err_skb(dev, &cf); tx_state = txerr >= rxerr ? state : 0; rx_state = txerr <= rxerr ? state : 0; can_change_state(dev, cf, tx_state, rx_state); if (state == CAN_STATE_BUS_OFF) { can_bus_off(dev); } else if (skb) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = txerr; cf->data[7] = rxerr; } if (skb) netif_rx(skb); } /* An error frame can contain more than one type of error. * * Examples: * * e1a : len 1, errors: ACK error * e3bcO: len 3, errors: Bit0 error, CRC error, Tx overrun error */ static void slcan_bump_err(struct slcan *sl) { struct net_device *dev = sl->dev; struct sk_buff *skb; struct can_frame *cf; char *cmd = sl->rbuff; bool rx_errors = false, tx_errors = false, rx_over_errors = false; int i, len; /* get len from sanitized ASCII value */ len = cmd[1]; if (len >= '0' && len < '9') len -= '0'; else return; if ((len + SLCAN_CMD_LEN + 1) > sl->rcount) return; skb = alloc_can_err_skb(dev, &cf); if (skb) cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; cmd += SLCAN_CMD_LEN + 1; for (i = 0; i < len; i++, cmd++) { switch (*cmd) { case 'a': netdev_dbg(dev, "ACK error\n"); tx_errors = true; if (skb) { cf->can_id |= CAN_ERR_ACK; cf->data[3] = CAN_ERR_PROT_LOC_ACK; } break; case 'b': netdev_dbg(dev, "Bit0 error\n"); tx_errors = true; if (skb) cf->data[2] |= CAN_ERR_PROT_BIT0; break; case 'B': netdev_dbg(dev, "Bit1 error\n"); tx_errors = true; if (skb) cf->data[2] |= CAN_ERR_PROT_BIT1; break; case 'c': netdev_dbg(dev, "CRC error\n"); rx_errors = true; if (skb) { cf->data[2] |= CAN_ERR_PROT_BIT; cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; } break; case 'f': netdev_dbg(dev, "Form Error\n"); rx_errors = true; if (skb) cf->data[2] |= CAN_ERR_PROT_FORM; break; case 'o': netdev_dbg(dev, "Rx overrun error\n"); rx_over_errors = true; rx_errors = true; if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; } break; case 'O': netdev_dbg(dev, "Tx overrun error\n"); tx_errors = true; if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_TX_OVERFLOW; } break; case 's': netdev_dbg(dev, "Stuff error\n"); rx_errors = true; if (skb) cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: if (skb) dev_kfree_skb(skb); return; } } if (rx_errors) dev->stats.rx_errors++; if (rx_over_errors) dev->stats.rx_over_errors++; if (tx_errors) dev->stats.tx_errors++; if (skb) netif_rx(skb); } static void slcan_bump(struct slcan *sl) { switch (sl->rbuff[0]) { case 'r': fallthrough; case 't': fallthrough; case 'R': fallthrough; case 'T': return slcan_bump_frame(sl); case 'e': return slcan_bump_err(sl); case 's': return slcan_bump_state(sl); default: return; } } /* parse tty input stream */ static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && sl->rcount > 4) slcan_bump(sl); sl->rcount = 0; } else { if (!test_bit(SLF_ERROR, &sl->flags)) { if (sl->rcount < SLCAN_MTU) { sl->rbuff[sl->rcount++] = s; return; } sl->dev->stats.rx_over_errors++; set_bit(SLF_ERROR, &sl->flags); } } } /************************************************************************* * STANDARD SLCAN ENCAPSULATION * *************************************************************************/ /* Encapsulate one can_frame and stuff into a TTY queue. */ static void slcan_encaps(struct slcan *sl, struct can_frame *cf) { int actual, i; unsigned char *pos; unsigned char *endpos; canid_t id = cf->can_id; pos = sl->xbuff; if (cf->can_id & CAN_RTR_FLAG) *pos = 'R'; /* becomes 'r' in standard frame format (SFF) */ else *pos = 'T'; /* becomes 't' in standard frame format (SSF) */ /* determine number of chars for the CAN-identifier */ if (cf->can_id & CAN_EFF_FLAG) { id &= CAN_EFF_MASK; endpos = pos + SLCAN_EFF_ID_LEN; } else { *pos |= 0x20; /* convert R/T to lower case for SFF */ id &= CAN_SFF_MASK; endpos = pos + SLCAN_SFF_ID_LEN; } /* build 3 (SFF) or 8 (EFF) digit CAN identifier */ pos++; while (endpos >= pos) { *endpos-- = hex_asc_upper[id & 0xf]; id >>= 4; } pos += (cf->can_id & CAN_EFF_FLAG) ? SLCAN_EFF_ID_LEN : SLCAN_SFF_ID_LEN; *pos++ = cf->len + '0'; /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf->can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf->len; i++) pos = hex_byte_pack_upper(pos, cf->data[i]); sl->dev->stats.tx_bytes += cf->len; } *pos++ = '\r'; /* Order of next two lines is *very* important. * When we are sending a little amount of data, * the transfer may be completed inside the ops->write() * routine, because it's running with interrupts enabled. * In this case we *never* got WRITE_WAKEUP event, * if we did not request it before write operation. * 14 Oct 1994 Dmitry Gorodchanin. */ set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); actual = sl->tty->ops->write(sl->tty, sl->xbuff, pos - sl->xbuff); sl->xleft = (pos - sl->xbuff) - actual; sl->xhead = sl->xbuff + actual; } /* Write out any remaining transmit buffer. Scheduled when tty is writable */ static void slcan_transmit(struct work_struct *work) { struct slcan *sl = container_of(work, struct slcan, tx_work); int actual; spin_lock_bh(&sl->lock); /* First make sure we're connected. */ if (unlikely(!netif_running(sl->dev)) && likely(!test_bit(SLF_XCMD, &sl->flags))) { spin_unlock_bh(&sl->lock); return; } if (sl->xleft <= 0) { if (unlikely(test_bit(SLF_XCMD, &sl->flags))) { clear_bit(SLF_XCMD, &sl->flags); clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); spin_unlock_bh(&sl->lock); wake_up(&sl->xcmd_wait); return; } /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); spin_unlock_bh(&sl->lock); netif_wake_queue(sl->dev); return; } actual = sl->tty->ops->write(sl->tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; spin_unlock_bh(&sl->lock); } /* Called by the driver when there's room for more data. * Schedule the transmit. */ static void slcan_write_wakeup(struct tty_struct *tty) { struct slcan *sl = tty->disc_data; schedule_work(&sl->tx_work); } /* Send a can_frame to a TTY queue. */ static netdev_tx_t slcan_netdev_xmit(struct sk_buff *skb, struct net_device *dev) { struct slcan *sl = netdev_priv(dev); if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; spin_lock(&sl->lock); if (!netif_running(dev)) { spin_unlock(&sl->lock); netdev_warn(dev, "xmit: iface is down\n"); goto out; } if (!sl->tty) { spin_unlock(&sl->lock); goto out; } netif_stop_queue(sl->dev); slcan_encaps(sl, (struct can_frame *)skb->data); /* encaps & send */ spin_unlock(&sl->lock); skb_tx_timestamp(skb); out: kfree_skb(skb); return NETDEV_TX_OK; } /****************************************** * Routines looking at netdevice side. ******************************************/ static int slcan_transmit_cmd(struct slcan *sl, const unsigned char *cmd) { int ret, actual, n; spin_lock(&sl->lock); if (!sl->tty) { spin_unlock(&sl->lock); return -ENODEV; } n = scnprintf(sl->xbuff, sizeof(sl->xbuff), "%s", cmd); set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); actual = sl->tty->ops->write(sl->tty, sl->xbuff, n); sl->xleft = n - actual; sl->xhead = sl->xbuff + actual; set_bit(SLF_XCMD, &sl->flags); spin_unlock(&sl->lock); ret = wait_event_interruptible_timeout(sl->xcmd_wait, !test_bit(SLF_XCMD, &sl->flags), HZ); clear_bit(SLF_XCMD, &sl->flags); if (ret == -ERESTARTSYS) return ret; if (ret == 0) return -ETIMEDOUT; return 0; } /* Netdevice UP -> DOWN routine */ static int slcan_netdev_close(struct net_device *dev) { struct slcan *sl = netdev_priv(dev); int err; if (sl->can.bittiming.bitrate && sl->can.bittiming.bitrate != CAN_BITRATE_UNKNOWN) { err = slcan_transmit_cmd(sl, "C\r"); if (err) netdev_warn(dev, "failed to send close command 'C\\r'\n"); } /* TTY discipline is running. */ clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); flush_work(&sl->tx_work); netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; close_candev(dev); sl->can.state = CAN_STATE_STOPPED; if (sl->can.bittiming.bitrate == CAN_BITRATE_UNKNOWN) sl->can.bittiming.bitrate = CAN_BITRATE_UNSET; return 0; } /* Netdevice DOWN -> UP routine */ static int slcan_netdev_open(struct net_device *dev) { struct slcan *sl = netdev_priv(dev); unsigned char cmd[SLCAN_MTU]; int err, s; /* The baud rate is not set with the command * `ip link set <iface> type can bitrate <baud>' and therefore * can.bittiming.bitrate is CAN_BITRATE_UNSET (0), causing * open_candev() to fail. So let's set to a fake value. */ if (sl->can.bittiming.bitrate == CAN_BITRATE_UNSET) sl->can.bittiming.bitrate = CAN_BITRATE_UNKNOWN; err = open_candev(dev); if (err) { netdev_err(dev, "failed to open can device\n"); return err; } if (sl->can.bittiming.bitrate != CAN_BITRATE_UNKNOWN) { for (s = 0; s < ARRAY_SIZE(slcan_bitrate_const); s++) { if (sl->can.bittiming.bitrate == slcan_bitrate_const[s]) break; } /* The CAN framework has already validate the bitrate value, * so we can avoid to check if `s' has been properly set. */ snprintf(cmd, sizeof(cmd), "C\rS%d\r", s); err = slcan_transmit_cmd(sl, cmd); if (err) { netdev_err(dev, "failed to send bitrate command 'C\\rS%d\\r'\n", s); goto cmd_transmit_failed; } if (test_bit(CF_ERR_RST, &sl->cmd_flags)) { err = slcan_transmit_cmd(sl, "F\r"); if (err) { netdev_err(dev, "failed to send error command 'F\\r'\n"); goto cmd_transmit_failed; } } if (sl->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) { err = slcan_transmit_cmd(sl, "L\r"); if (err) { netdev_err(dev, "failed to send listen-only command 'L\\r'\n"); goto cmd_transmit_failed; } } else { err = slcan_transmit_cmd(sl, "O\r"); if (err) { netdev_err(dev, "failed to send open command 'O\\r'\n"); goto cmd_transmit_failed; } } } sl->can.state = CAN_STATE_ERROR_ACTIVE; netif_start_queue(dev); return 0; cmd_transmit_failed: close_candev(dev); return err; } static const struct net_device_ops slcan_netdev_ops = { .ndo_open = slcan_netdev_open, .ndo_stop = slcan_netdev_close, .ndo_start_xmit = slcan_netdev_xmit, .ndo_change_mtu = can_change_mtu, }; /****************************************** * Routines looking at TTY side. ******************************************/ /* Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of SLCAN data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. This will not * be re-entered while running but other ldisc functions may be called * in parallel */ static void slcan_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct slcan *sl = tty->disc_data; if (!netif_running(sl->dev)) return; /* Read the characters out of the buffer */ while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; cp++; continue; } slcan_unesc(sl, *cp++); } } /* Open the high-level part of the SLCAN channel. * This function is called by the TTY module when the * SLCAN line discipline is called for. * * Called in process context serialized from other ldisc calls. */ static int slcan_open(struct tty_struct *tty) { struct net_device *dev; struct slcan *sl; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!tty->ops->write) return -EOPNOTSUPP; dev = alloc_candev(sizeof(*sl), 1); if (!dev) return -ENFILE; sl = netdev_priv(dev); /* Configure TTY interface */ tty->receive_room = 65536; /* We don't flow control */ sl->rcount = 0; sl->xleft = 0; spin_lock_init(&sl->lock); INIT_WORK(&sl->tx_work, slcan_transmit); init_waitqueue_head(&sl->xcmd_wait); /* Configure CAN metadata */ sl->can.bitrate_const = slcan_bitrate_const; sl->can.bitrate_const_cnt = ARRAY_SIZE(slcan_bitrate_const); sl->can.ctrlmode_supported = CAN_CTRLMODE_LISTENONLY; /* Configure netdev interface */ sl->dev = dev; dev->netdev_ops = &slcan_netdev_ops; dev->ethtool_ops = &slcan_ethtool_ops; /* Mark ldisc channel as alive */ sl->tty = tty; tty->disc_data = sl; err = register_candev(dev); if (err) { free_candev(dev); pr_err("can't register candev\n"); return err; } netdev_info(dev, "slcan on %s.\n", tty->name); /* TTY layer expects 0 on success */ return 0; } /* Close down a SLCAN channel. * This means flushing out any pending queues, and then returning. This * call is serialized against other ldisc functions. * Once this is called, no other ldisc function of ours is entered. * * We also use this method for a hangup event. */ static void slcan_close(struct tty_struct *tty) { struct slcan *sl = tty->disc_data; unregister_candev(sl->dev); /* * The netdev needn't be UP (so .ndo_stop() is not called). Hence make * sure this is not running before freeing it up. */ flush_work(&sl->tx_work); /* Mark channel as dead */ spin_lock_bh(&sl->lock); tty->disc_data = NULL; sl->tty = NULL; spin_unlock_bh(&sl->lock); netdev_info(sl->dev, "slcan off %s.\n", tty->name); free_candev(sl->dev); } /* Perform I/O control on an active SLCAN channel. */ static int slcan_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct slcan *sl = tty->disc_data; unsigned int tmp; switch (cmd) { case SIOCGIFNAME: tmp = strlen(sl->dev->name) + 1; if (copy_to_user((void __user *)arg, sl->dev->name, tmp)) return -EFAULT; return 0; case SIOCSIFHWADDR: return -EINVAL; default: return tty_mode_ioctl(tty, cmd, arg); } } static struct tty_ldisc_ops slcan_ldisc = { .owner = THIS_MODULE, .num = N_SLCAN, .name = KBUILD_MODNAME, .open = slcan_open, .close = slcan_close, .ioctl = slcan_ioctl, .receive_buf = slcan_receive_buf, .write_wakeup = slcan_write_wakeup, }; static int __init slcan_init(void) { int status; pr_info("serial line CAN interface driver\n"); /* Fill in our line protocol discipline, and register it */ status = tty_register_ldisc(&slcan_ldisc); if (status) pr_err("can't register line discipline\n"); return status; } static void __exit slcan_exit(void) { /* This will only be called when all channels have been closed by * userspace - tty_ldisc.c takes care of the module's refcount. */ tty_unregister_ldisc(&slcan_ldisc); } module_init(slcan_init); module_exit(slcan_exit);
8 19 1 1 3 3 3 3 1 1 1 16 16 7 4 14 3 14 9 9 9 1 1 18 19 19 19 3 3 1 1 1 10 19 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2001 Jean-Fredric Clere, Nikolas Zimmermann, Georg Acher * Mark Cave-Ayland, Carlo E Prelz, Dick Streefland * Copyright (c) 2002, 2003 Tuukka Toivonen * Copyright (c) 2008 Erik Andrén * * P/N 861037: Sensor HDCS1000 ASIC STV0600 * P/N 861050-0010: Sensor HDCS1000 ASIC STV0600 * P/N 861050-0020: Sensor Photobit PB100 ASIC STV0600-1 - QuickCam Express * P/N 861055: Sensor ST VV6410 ASIC STV0610 - LEGO cam * P/N 861075-0040: Sensor HDCS1000 ASIC * P/N 961179-0700: Sensor ST VV6410 ASIC STV0602 - Dexxa WebCam USB * P/N 861040-0000: Sensor ST VV6410 ASIC STV0610 - QuickCam Web */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/input.h> #include "stv06xx_sensor.h" MODULE_AUTHOR("Erik Andrén"); MODULE_DESCRIPTION("STV06XX USB Camera Driver"); MODULE_LICENSE("GPL"); static bool dump_bridge; static bool dump_sensor; int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data) { int err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; u8 len = (i2c_data > 0xff) ? 2 : 1; buf[0] = i2c_data & 0xff; buf[1] = (i2c_data >> 8) & 0xff; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x04, 0x40, address, 0, buf, len, STV06XX_URB_MSG_TIMEOUT); gspca_dbg(gspca_dev, D_CONF, "Written 0x%x to address 0x%x, status: %d\n", i2c_data, address, err); return (err < 0) ? err : 0; } int stv06xx_read_bridge(struct sd *sd, u16 address, u8 *i2c_data) { int err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; err = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x04, 0xc0, address, 0, buf, 1, STV06XX_URB_MSG_TIMEOUT); *i2c_data = buf[0]; gspca_dbg(gspca_dev, D_CONF, "Reading 0x%x from address 0x%x, status %d\n", *i2c_data, address, err); return (err < 0) ? err : 0; } /* Wraps the normal write sensor bytes / words functions for writing a single value */ int stv06xx_write_sensor(struct sd *sd, u8 address, u16 value) { if (sd->sensor->i2c_len == 2) { u16 data[2] = { address, value }; return stv06xx_write_sensor_words(sd, data, 1); } else { u8 data[2] = { address, value }; return stv06xx_write_sensor_bytes(sd, data, 1); } } static int stv06xx_write_sensor_finish(struct sd *sd) { int err = 0; if (sd->bridge == BRIDGE_STV610) { struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; buf[0] = 0; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x04, 0x40, 0x1704, 0, buf, 1, STV06XX_URB_MSG_TIMEOUT); } return (err < 0) ? err : 0; } int stv06xx_write_sensor_bytes(struct sd *sd, const u8 *data, u8 len) { int err, i, j; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; gspca_dbg(gspca_dev, D_CONF, "I2C: Command buffer contains %d entries\n", len); for (i = 0; i < len;) { /* Build the command buffer */ memset(buf, 0, I2C_BUFFER_LENGTH); for (j = 0; j < I2C_MAX_BYTES && i < len; j++, i++) { buf[j] = data[2*i]; buf[0x10 + j] = data[2*i+1]; gspca_dbg(gspca_dev, D_CONF, "I2C: Writing 0x%02x to reg 0x%02x\n", data[2*i+1], data[2*i]); } buf[0x20] = sd->sensor->i2c_addr; buf[0x21] = j - 1; /* Number of commands to send - 1 */ buf[0x22] = I2C_WRITE_CMD; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x04, 0x40, 0x0400, 0, buf, I2C_BUFFER_LENGTH, STV06XX_URB_MSG_TIMEOUT); if (err < 0) return err; } return stv06xx_write_sensor_finish(sd); } int stv06xx_write_sensor_words(struct sd *sd, const u16 *data, u8 len) { int err, i, j; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; gspca_dbg(gspca_dev, D_CONF, "I2C: Command buffer contains %d entries\n", len); for (i = 0; i < len;) { /* Build the command buffer */ memset(buf, 0, I2C_BUFFER_LENGTH); for (j = 0; j < I2C_MAX_WORDS && i < len; j++, i++) { buf[j] = data[2*i]; buf[0x10 + j * 2] = data[2*i+1]; buf[0x10 + j * 2 + 1] = data[2*i+1] >> 8; gspca_dbg(gspca_dev, D_CONF, "I2C: Writing 0x%04x to reg 0x%02x\n", data[2*i+1], data[2*i]); } buf[0x20] = sd->sensor->i2c_addr; buf[0x21] = j - 1; /* Number of commands to send - 1 */ buf[0x22] = I2C_WRITE_CMD; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x04, 0x40, 0x0400, 0, buf, I2C_BUFFER_LENGTH, STV06XX_URB_MSG_TIMEOUT); if (err < 0) return err; } return stv06xx_write_sensor_finish(sd); } int stv06xx_read_sensor(struct sd *sd, const u8 address, u16 *value) { int err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; struct usb_device *udev = sd->gspca_dev.dev; __u8 *buf = sd->gspca_dev.usb_buf; err = stv06xx_write_bridge(sd, STV_I2C_FLUSH, sd->sensor->i2c_flush); if (err < 0) return err; /* Clear mem */ memset(buf, 0, I2C_BUFFER_LENGTH); buf[0] = address; buf[0x20] = sd->sensor->i2c_addr; buf[0x21] = 0; /* Read I2C register */ buf[0x22] = I2C_READ_CMD; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x04, 0x40, 0x1400, 0, buf, I2C_BUFFER_LENGTH, STV06XX_URB_MSG_TIMEOUT); if (err < 0) { pr_err("I2C: Read error writing address: %d\n", err); return err; } err = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x04, 0xc0, 0x1410, 0, buf, sd->sensor->i2c_len, STV06XX_URB_MSG_TIMEOUT); if (sd->sensor->i2c_len == 2) *value = buf[0] | (buf[1] << 8); else *value = buf[0]; gspca_dbg(gspca_dev, D_CONF, "I2C: Read 0x%x from address 0x%x, status: %d\n", *value, address, err); return (err < 0) ? err : 0; } /* Dumps all bridge registers */ static void stv06xx_dump_bridge(struct sd *sd) { int i; u8 data, buf; pr_info("Dumping all stv06xx bridge registers\n"); for (i = 0x1400; i < 0x160f; i++) { stv06xx_read_bridge(sd, i, &data); pr_info("Read 0x%x from address 0x%x\n", data, i); } pr_info("Testing stv06xx bridge registers for writability\n"); for (i = 0x1400; i < 0x160f; i++) { stv06xx_read_bridge(sd, i, &data); buf = data; stv06xx_write_bridge(sd, i, 0xff); stv06xx_read_bridge(sd, i, &data); if (data == 0xff) pr_info("Register 0x%x is read/write\n", i); else if (data != buf) pr_info("Register 0x%x is read/write, but only partially\n", i); else pr_info("Register 0x%x is read-only\n", i); stv06xx_write_bridge(sd, i, buf); } } /* this function is called at probe and resume time */ static int stv06xx_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int err; gspca_dbg(gspca_dev, D_PROBE, "Initializing camera\n"); /* Let the usb init settle for a bit before performing the initialization */ msleep(250); err = sd->sensor->init(sd); if (dump_sensor && sd->sensor->dump) sd->sensor->dump(sd); return (err < 0) ? err : 0; } /* this function is called at probe time */ static int stv06xx_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_PROBE, "Initializing controls\n"); gspca_dev->vdev.ctrl_handler = &gspca_dev->ctrl_handler; return sd->sensor->init_controls(sd); } /* Start the camera */ static int stv06xx_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct usb_host_interface *alt; struct usb_interface *intf; int err, packet_size; intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface); alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) { gspca_err(gspca_dev, "Couldn't get altsetting\n"); return -EIO; } if (alt->desc.bNumEndpoints < 1) return -ENODEV; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); err = stv06xx_write_bridge(sd, STV_ISO_SIZE_L, packet_size); if (err < 0) return err; /* Prepare the sensor for start */ err = sd->sensor->start(sd); if (err < 0) goto out; /* Start isochronous streaming */ err = stv06xx_write_bridge(sd, STV_ISO_ENABLE, 1); out: if (err < 0) gspca_dbg(gspca_dev, D_STREAM, "Starting stream failed\n"); else gspca_dbg(gspca_dev, D_STREAM, "Started streaming\n"); return (err < 0) ? err : 0; } static int stv06xx_isoc_init(struct gspca_dev *gspca_dev) { struct usb_interface_cache *intfc; struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; intfc = gspca_dev->dev->actconfig->intf_cache[0]; if (intfc->num_altsetting < 2) return -ENODEV; alt = &intfc->altsetting[1]; if (alt->desc.bNumEndpoints < 1) return -ENODEV; /* Start isoc bandwidth "negotiation" at max isoc bandwidth */ alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(sd->sensor->max_packet_size[gspca_dev->curr_mode]); return 0; } static int stv06xx_isoc_nego(struct gspca_dev *gspca_dev) { int ret, packet_size, min_packet_size; struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; /* * Existence of altsetting and endpoint was verified in * stv06xx_isoc_init() */ alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); min_packet_size = sd->sensor->min_packet_size[gspca_dev->curr_mode]; if (packet_size <= min_packet_size) return -EIO; packet_size -= 100; if (packet_size < min_packet_size) packet_size = min_packet_size; alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(packet_size); ret = usb_set_interface(gspca_dev->dev, gspca_dev->iface, 1); if (ret < 0) gspca_err(gspca_dev, "set alt 1 err %d\n", ret); return ret; } static void stv06xx_stopN(struct gspca_dev *gspca_dev) { int err; struct sd *sd = (struct sd *) gspca_dev; /* stop ISO-streaming */ err = stv06xx_write_bridge(sd, STV_ISO_ENABLE, 0); if (err < 0) goto out; err = sd->sensor->stop(sd); out: if (err < 0) gspca_dbg(gspca_dev, D_STREAM, "Failed to stop stream\n"); else gspca_dbg(gspca_dev, D_STREAM, "Stopped streaming\n"); } /* * Analyse an USB packet of the data stream and store it appropriately. * Each packet contains an integral number of chunks. Each chunk has * 2-bytes identification, followed by 2-bytes that describe the chunk * length. Known/guessed chunk identifications are: * 8001/8005/C001/C005 - Begin new frame * 8002/8006/C002/C006 - End frame * 0200/4200 - Contains actual image data, bayer or compressed * 0005 - 11 bytes of unknown data * 0100 - 2 bytes of unknown data * The 0005 and 0100 chunks seem to appear only in compressed stream. */ static void stv06xx_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_PACK, "Packet of length %d arrived\n", len); /* A packet may contain several frames loop until the whole packet is reached */ while (len) { int id, chunk_len; if (len < 4) { gspca_dbg(gspca_dev, D_PACK, "Packet is smaller than 4 bytes\n"); return; } /* Capture the id */ id = (data[0] << 8) | data[1]; /* Capture the chunk length */ chunk_len = (data[2] << 8) | data[3]; gspca_dbg(gspca_dev, D_PACK, "Chunk id: %x, length: %d\n", id, chunk_len); data += 4; len -= 4; if (len < chunk_len) { gspca_err(gspca_dev, "URB packet length is smaller than the specified chunk length\n"); gspca_dev->last_packet_type = DISCARD_PACKET; return; } /* First byte seem to be 02=data 2nd byte is unknown??? */ if (sd->bridge == BRIDGE_ST6422 && (id & 0xff00) == 0x0200) goto frame_data; switch (id) { case 0x0200: case 0x4200: frame_data: gspca_dbg(gspca_dev, D_PACK, "Frame data packet detected\n"); if (sd->to_skip) { int skip = (sd->to_skip < chunk_len) ? sd->to_skip : chunk_len; data += skip; len -= skip; chunk_len -= skip; sd->to_skip -= skip; } gspca_frame_add(gspca_dev, INTER_PACKET, data, chunk_len); break; case 0x8001: case 0x8005: case 0xc001: case 0xc005: gspca_dbg(gspca_dev, D_PACK, "Starting new frame\n"); /* Create a new frame, chunk length should be zero */ gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0); if (sd->bridge == BRIDGE_ST6422) sd->to_skip = gspca_dev->pixfmt.width * 4; if (chunk_len) gspca_err(gspca_dev, "Chunk length is non-zero on a SOF\n"); break; case 0x8002: case 0x8006: case 0xc002: gspca_dbg(gspca_dev, D_PACK, "End of frame detected\n"); /* Complete the last frame (if any) */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); if (chunk_len) gspca_err(gspca_dev, "Chunk length is non-zero on a EOF\n"); break; case 0x0005: gspca_dbg(gspca_dev, D_PACK, "Chunk 0x005 detected\n"); /* Unknown chunk with 11 bytes of data, occurs just before end of each frame in compressed mode */ break; case 0x0100: gspca_dbg(gspca_dev, D_PACK, "Chunk 0x0100 detected\n"); /* Unknown chunk with 2 bytes of data, occurs 2-3 times per USB interrupt */ break; case 0x42ff: gspca_dbg(gspca_dev, D_PACK, "Chunk 0x42ff detected\n"); /* Special chunk seen sometimes on the ST6422 */ break; default: gspca_dbg(gspca_dev, D_PACK, "Unknown chunk 0x%04x detected\n", id); /* Unknown chunk */ } data += chunk_len; len -= chunk_len; } } #if IS_ENABLED(CONFIG_INPUT) static int sd_int_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* interrupt packet data */ int len) /* interrupt packet length */ { int ret = -EINVAL; if (len == 1 && (data[0] == 0x80 || data[0] == 0x10)) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 1); input_sync(gspca_dev->input_dev); ret = 0; } if (len == 1 && (data[0] == 0x88 || data[0] == 0x11)) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); ret = 0; } return ret; } #endif static int stv06xx_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id); static void stv06xx_probe_error(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; kfree(sd->sensor_priv); sd->sensor_priv = NULL; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = stv06xx_config, .init = stv06xx_init, .init_controls = stv06xx_init_controls, .probe_error = stv06xx_probe_error, .start = stv06xx_start, .stopN = stv06xx_stopN, .pkt_scan = stv06xx_pkt_scan, .isoc_init = stv06xx_isoc_init, .isoc_nego = stv06xx_isoc_nego, #if IS_ENABLED(CONFIG_INPUT) .int_pkt_scan = sd_int_pkt_scan, #endif }; /* This function is called at probe time */ static int stv06xx_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_PROBE, "Configuring camera\n"); sd->bridge = id->driver_info; gspca_dev->sd_desc = &sd_desc; if (dump_bridge) stv06xx_dump_bridge(sd); sd->sensor = &stv06xx_sensor_st6422; if (!sd->sensor->probe(sd)) return 0; sd->sensor = &stv06xx_sensor_vv6410; if (!sd->sensor->probe(sd)) return 0; sd->sensor = &stv06xx_sensor_hdcs1x00; if (!sd->sensor->probe(sd)) return 0; sd->sensor = &stv06xx_sensor_hdcs1020; if (!sd->sensor->probe(sd)) return 0; sd->sensor = &stv06xx_sensor_pb0100; if (!sd->sensor->probe(sd)) return 0; sd->sensor = NULL; return -ENODEV; } /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x046d, 0x0840), .driver_info = BRIDGE_STV600 }, /* QuickCam Express */ {USB_DEVICE(0x046d, 0x0850), .driver_info = BRIDGE_STV610 }, /* LEGO cam / QuickCam Web */ {USB_DEVICE(0x046d, 0x0870), .driver_info = BRIDGE_STV602 }, /* Dexxa WebCam USB */ {USB_DEVICE(0x046D, 0x08F0), .driver_info = BRIDGE_ST6422 }, /* QuickCam Messenger */ {USB_DEVICE(0x046D, 0x08F5), .driver_info = BRIDGE_ST6422 }, /* QuickCam Communicate */ {USB_DEVICE(0x046D, 0x08F6), .driver_info = BRIDGE_ST6422 }, /* QuickCam Messenger (new) */ {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static void sd_disconnect(struct usb_interface *intf) { struct gspca_dev *gspca_dev = usb_get_intfdata(intf); struct sd *sd = (struct sd *) gspca_dev; void *priv = sd->sensor_priv; gspca_dbg(gspca_dev, D_PROBE, "Disconnecting the stv06xx device\n"); sd->sensor = NULL; gspca_disconnect(intf); kfree(priv); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = sd_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); module_param(dump_bridge, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dump_bridge, "Dumps all usb bridge registers at startup"); module_param(dump_sensor, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dump_sensor, "Dumps all sensor registers at startup");
13 13 1 12 13 1 13 9 9 7 7 7 7 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" struct linkinfo_req_info { struct ethnl_req_info base; }; struct linkinfo_reply_data { struct ethnl_reply_data base; struct ethtool_link_ksettings ksettings; struct ethtool_link_settings *lsettings; }; #define LINKINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct linkinfo_reply_data, base) const struct nla_policy ethnl_linkinfo_get_policy[] = { [ETHTOOL_A_LINKINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int linkinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; data->lsettings = &data->ksettings.base; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = __ethtool_get_link_ksettings(dev, &data->ksettings); if (ret < 0) GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); ethnl_ops_complete(dev); return ret; } static int linkinfo_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u8)) /* LINKINFO_PORT */ + nla_total_size(sizeof(u8)) /* LINKINFO_PHYADDR */ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX */ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX_CTRL */ + nla_total_size(sizeof(u8)) /* LINKINFO_TRANSCEIVER */ + 0; } static int linkinfo_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base); if (nla_put_u8(skb, ETHTOOL_A_LINKINFO_PORT, data->lsettings->port) || nla_put_u8(skb, ETHTOOL_A_LINKINFO_PHYADDR, data->lsettings->phy_address) || nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX, data->lsettings->eth_tp_mdix) || nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, data->lsettings->eth_tp_mdix_ctrl) || nla_put_u8(skb, ETHTOOL_A_LINKINFO_TRANSCEIVER, data->lsettings->transceiver)) return -EMSGSIZE; return 0; } /* LINKINFO_SET */ const struct nla_policy ethnl_linkinfo_set_policy[] = { [ETHTOOL_A_LINKINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_LINKINFO_PORT] = { .type = NLA_U8 }, [ETHTOOL_A_LINKINFO_PHYADDR] = { .type = NLA_U8 }, [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_U8 }, }; static int ethnl_set_linkinfo_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; if (!ops->get_link_ksettings || !ops->set_link_ksettings) return -EOPNOTSUPP; return 1; } static int ethnl_set_linkinfo(struct ethnl_req_info *req_info, struct genl_info *info) { struct ethtool_link_ksettings ksettings = {}; struct ethtool_link_settings *lsettings; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; int ret; ret = __ethtool_get_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); return ret; } lsettings = &ksettings.base; ethnl_update_u8(&lsettings->port, tb[ETHTOOL_A_LINKINFO_PORT], &mod); ethnl_update_u8(&lsettings->phy_address, tb[ETHTOOL_A_LINKINFO_PHYADDR], &mod); ethnl_update_u8(&lsettings->eth_tp_mdix_ctrl, tb[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL], &mod); if (!mod) return 0; ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "link settings update failed"); return ret; } return 1; } const struct ethnl_request_ops ethnl_linkinfo_request_ops = { .request_cmd = ETHTOOL_MSG_LINKINFO_GET, .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY, .hdr_attr = ETHTOOL_A_LINKINFO_HEADER, .req_info_size = sizeof(struct linkinfo_req_info), .reply_data_size = sizeof(struct linkinfo_reply_data), .prepare_data = linkinfo_prepare_data, .reply_size = linkinfo_reply_size, .fill_reply = linkinfo_fill_reply, .set_validate = ethnl_set_linkinfo_validate, .set = ethnl_set_linkinfo, .set_ntf_cmd = ETHTOOL_MSG_LINKINFO_NTF, };
341 152 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/path.h> #include <linux/fcntl.h> #include <linux/errno.h> enum { MAX_NESTED_LINKS = 8 }; #define MAXSYMLINKS 40 /* * Type of the last component on LOOKUP_PARENT */ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ #define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ #define LOOKUP_DIRECTORY 0x0002 /* require a directory */ #define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ #define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ #define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ #define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ #define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ /* These tell filesystem methods that we are dealing with the final component... */ #define LOOKUP_OPEN 0x0100 /* ... in open */ #define LOOKUP_CREATE 0x0200 /* ... in object creation */ #define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ #define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ /* internal use only */ #define LOOKUP_PARENT 0x0010 /* Scoping flags for lookup. */ #define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */ #define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */ #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ #define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ #define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) extern int path_pts(struct path *path); extern int user_path_at(int, const char __user *, unsigned, struct path *); struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, unsigned int flags); extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root); int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); /** * mode_strip_umask - handle vfs umask stripping * @dir: parent directory of the new inode * @mode: mode of the new inode to be created in @dir * * In most filesystems, umask stripping depends on whether or not the * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask * stripping is done directly in here. If the filesystem does support POSIX * ACLs umask stripping is deferred until the filesystem calls * posix_acl_create(). * * Some filesystems (like NFSv4) also want to avoid umask stripping by the * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK * to get this effect without declaring that they support POSIX ACLs. * * Returns: mode */ static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode) { if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK)) mode &= ~current_umask(); return mode; } extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { ((char *) name)[min(len, maxlen)] = '\0'; } /** * retry_estale - determine whether the caller should retry an operation * @error: the error that would currently be returned * @flags: flags being used for next lookup attempt * * Check to see if the error code was -ESTALE, and then determine whether * to retry the call based on whether "flags" already has LOOKUP_REVAL set. * * Returns true if the caller should try the operation again. */ static inline bool retry_estale(const long error, const unsigned int flags) { return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL)); } #endif /* _LINUX_NAMEI_H */
1 1 1 1 1 2 2 8 8 7 7 7 7 7 7 169 159 2 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> struct nft_bitmap_elem { struct nft_elem_priv priv; struct list_head head; struct nft_set_ext ext; }; /* This bitmap uses two bits to represent one element. These two bits determine * the element state in the current and the future generation. * * An element can be in three states. The generation cursor is represented using * the ^ character, note that this cursor shifts on every successful transaction. * If no transaction is going on, we observe all elements are in the following * state: * * 11 = this element is active in the current generation. In case of no updates, * ^ it stays active in the next generation. * 00 = this element is inactive in the current generation. In case of no * ^ updates, it stays inactive in the next generation. * * On transaction handling, we observe these two temporary states: * * 01 = this element is inactive in the current generation and it becomes active * ^ in the next one. This happens when the element is inserted but commit * path has not yet been executed yet, so activation is still pending. On * transaction abortion, the element is removed. * 10 = this element is active in the current generation and it becomes inactive * ^ in the next one. This happens when the element is deactivated but commit * path has not yet been executed yet, so removal is still pending. On * transaction abortion, the next generation bit is reset to go back to * restore its previous state. */ struct nft_bitmap { struct list_head list; u16 bitmap_size; u8 bitmap[]; }; static inline void nft_bitmap_location(const struct nft_set *set, const void *key, u32 *idx, u32 *off) { u32 k; if (set->klen == 2) k = *(u16 *)key; else k = *(u8 *)key; k <<= 1; *idx = k / BITS_PER_BYTE; *off = k % BITS_PER_BYTE; } /* Fetch the two bits that represent the element and check if it is active based * on the generation mask. */ static inline bool nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask) { return (bitmap[idx] & (0x3 << off)) & (genmask << off); } INDIRECT_CALLABLE_SCOPE bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { const struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); u32 idx, off; nft_bitmap_location(set, key, &idx, &off); return nft_bitmap_active(priv->bitmap, idx, off, genmask); } static struct nft_bitmap_elem * nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, u8 genmask) { const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; list_for_each_entry_rcu(be, &priv->list, head) { if (memcmp(nft_set_ext_key(&be->ext), nft_set_ext_key(&this->ext), set->klen) || !nft_set_elem_active(&be->ext, genmask)) continue; return be; } return NULL; } static struct nft_elem_priv * nft_bitmap_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { const struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); struct nft_bitmap_elem *be; list_for_each_entry_rcu(be, &priv->list, head) { if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) || !nft_set_elem_active(&be->ext, genmask)) continue; return &be->priv; } return ERR_PTR(-ENOENT); } static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **elem_priv) { struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; be = nft_bitmap_elem_find(set, new, genmask); if (be) { *elem_priv = &be->priv; return -EEXIST; } nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off); /* Enter 01 state. */ priv->bitmap[idx] |= (genmask << off); list_add_tail_rcu(&new->head, &priv->list); return 0; } static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 00 state. */ priv->bitmap[idx] &= ~(genmask << off); list_del_rcu(&be->head); } static void nft_bitmap_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); nft_clear(net, &be->ext); } static void nft_bitmap_flush(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); } static struct nft_elem_priv * nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; nft_bitmap_location(set, elem->key.val.data, &idx, &off); be = nft_bitmap_elem_find(set, this, genmask); if (!be) return NULL; /* Enter 10 state. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); return &be->priv; } static void nft_bitmap_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; list_for_each_entry_rcu(be, &priv->list, head) { if (iter->count < iter->skip) goto cont; iter->err = iter->fn(ctx, set, iter, &be->priv); if (iter->err < 0) return; cont: iter->count++; } } /* The bitmap size is pow(2, key length in bits) / bits per byte. This is * multiplied by two since each element takes two bits. For 8 bit keys, the * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes. */ static inline u32 nft_bitmap_size(u32 klen) { return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; } static inline u64 nft_bitmap_total_size(u32 klen) { return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } static u64 nft_bitmap_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); return nft_bitmap_total_size(klen); } static int nft_bitmap_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]) { struct nft_bitmap *priv = nft_set_priv(set); BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0); INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); return 0; } static void nft_bitmap_destroy(const struct nft_ctx *ctx, const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) nf_tables_set_elem_destroy(ctx, set, &be->priv); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */ if (desc->klen > 2) return false; else if (desc->expr) return false; est->size = nft_bitmap_total_size(desc->klen); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_1; return true; } const struct nft_set_type nft_set_bitmap_type = { .ops = { .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, .init = nft_bitmap_init, .destroy = nft_bitmap_destroy, .insert = nft_bitmap_insert, .remove = nft_bitmap_remove, .deactivate = nft_bitmap_deactivate, .flush = nft_bitmap_flush, .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, .get = nft_bitmap_get, }, };
12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 97 1 1 1 1 1 1 1 1 1 1 103 103 103 103 103 10 13 18 18 18 18 1 1 15 16 16 1 11 2 18 3 3 51 26 26 12 13 1 26 1 26 13 26 76 8 56 15 14 5 1 1 3 3 3 3 3 9 9 9 9 9 1 7 1 2 79 108 79 9 79 3 1 18 5 8 9 9 8 1 1 5 5 5 5 5 5 5 5 103 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 // SPDX-License-Identifier: GPL-2.0-only /* * IBSS mode implementation * Copyright 2003-2008, Jouni Malinen <j@w1.fi> * Copyright 2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH * Copyright(c) 2018-2024 Intel Corporation */ #include <linux/delay.h> #include <linux/slab.h> #include <linux/if_ether.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) #define IEEE80211_IBSS_RSN_INACTIVITY_LIMIT (10 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 static struct beacon_data * ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, const int beacon_int, const u32 basic_rates, const u16 capability, u64 tsf, struct cfg80211_chan_def *chandef, bool *have_higher_than_11mbit, struct cfg80211_csa_settings *csa_settings) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; int rates_n = 0, i, ri; struct ieee80211_mgmt *mgmt; u8 *pos; struct ieee80211_supported_band *sband; u32 rate_flags, rates = 0, rates_added = 0; struct beacon_data *presp; int frame_len; /* Build IBSS probe response */ frame_len = sizeof(struct ieee80211_hdr_3addr) + 12 /* struct ieee80211_mgmt.u.beacon */ + 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ + 2 + 8 /* max Supported Rates */ + 3 /* max DS params */ + 4 /* IBSS params */ + 5 /* Channel Switch Announcement */ + 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + ifibss->ie_len; presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); if (!presp) return NULL; presp->head = (void *)(presp + 1); mgmt = (void *) presp->head; mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int); mgmt->u.beacon.timestamp = cpu_to_le64(tsf); mgmt->u.beacon.capab_info = cpu_to_le16(capability); pos = (u8 *)mgmt + offsetof(struct ieee80211_mgmt, u.beacon.variable); *pos++ = WLAN_EID_SSID; *pos++ = ifibss->ssid_len; memcpy(pos, ifibss->ssid, ifibss->ssid_len); pos += ifibss->ssid_len; sband = local->hw.wiphy->bands[chandef->chan->band]; rate_flags = ieee80211_chandef_rate_flags(chandef); rates_n = 0; if (have_higher_than_11mbit) *have_higher_than_11mbit = false; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; if (sband->bitrates[i].bitrate > 110 && have_higher_than_11mbit) *have_higher_than_11mbit = true; rates |= BIT(i); rates_n++; } *pos++ = WLAN_EID_SUPP_RATES; *pos++ = min_t(int, 8, rates_n); for (ri = 0; ri < sband->n_bitrates; ri++) { int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; if (basic_rates & BIT(ri)) basic = 0x80; *pos++ = basic | (u8) rate; if (++rates_added == 8) { ri++; /* continue at next rate for EXT_SUPP_RATES */ break; } } if (sband->band == NL80211_BAND_2GHZ) { *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; *pos++ = ieee80211_frequency_to_channel( chandef->chan->center_freq); } *pos++ = WLAN_EID_IBSS_PARAMS; *pos++ = 2; /* FIX: set ATIM window based on scan results */ *pos++ = 0; *pos++ = 0; if (csa_settings) { *pos++ = WLAN_EID_CHANNEL_SWITCH; *pos++ = 3; *pos++ = csa_settings->block_tx ? 1 : 0; *pos++ = ieee80211_frequency_to_channel( csa_settings->chandef.chan->center_freq); presp->cntdwn_counter_offsets[0] = (pos - presp->head); *pos++ = csa_settings->count; presp->cntdwn_current_counter = csa_settings->count; } /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */ if (rates_n > 8) { *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = rates_n - 8; for (; ri < sband->n_bitrates; ri++) { int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; if (basic_rates & BIT(ri)) basic = 0x80; *pos++ = basic | (u8) rate; } } if (ifibss->ie_len) { memcpy(pos, ifibss->ie, ifibss->ie_len); pos += ifibss->ie_len; } /* add HT capability and information IEs */ if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT && chandef->width != NL80211_CHAN_WIDTH_5 && chandef->width != NL80211_CHAN_WIDTH_10 && sband->ht_cap.ht_supported) { struct ieee80211_sta_ht_cap ht_cap; memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); ieee80211_apply_htcap_overrides(sdata, &ht_cap); pos = ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); /* * Note: According to 802.11n-2009 9.13.3.1, HT Protection * field and RIFS Mode are reserved in IBSS mode, therefore * keep them at 0 */ pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, chandef, 0, false); /* add VHT capability and information IEs */ if (chandef->width != NL80211_CHAN_WIDTH_20 && chandef->width != NL80211_CHAN_WIDTH_40 && sband->vht_cap.vht_supported) { pos = ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, sband->vht_cap.cap); pos = ieee80211_ie_build_vht_oper(pos, &sband->vht_cap, chandef); } } if (local->hw.queues >= IEEE80211_NUM_ACS) pos = ieee80211_add_wmm_info_ie(pos, 0); /* U-APSD not in use */ presp->head_len = pos - presp->head; if (WARN_ON(presp->head_len > frame_len)) goto error; return presp; error: kfree(presp); return NULL; } static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const int beacon_int, struct cfg80211_chan_def *req_chandef, const u32 basic_rates, const u16 capability, u64 tsf, bool creator) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct cfg80211_bss *bss; u64 bss_change; struct ieee80211_chan_req chanreq = {}; struct ieee80211_channel *chan; struct beacon_data *presp; struct cfg80211_inform_bss bss_meta = {}; bool have_higher_than_11mbit; bool radar_required; int err; lockdep_assert_wiphy(local->hw.wiphy); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); if (!ether_addr_equal(ifibss->bssid, bssid)) sta_info_flush(sdata, -1); /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.cfg.ibss_joined) { sdata->vif.cfg.ibss_joined = false; sdata->vif.cfg.ibss_creator = false; sdata->vif.bss_conf.enable_beacon = false; netif_carrier_off(sdata->dev); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS | BSS_CHANGED_BEACON_ENABLED); drv_leave_ibss(local, sdata); } presp = sdata_dereference(ifibss->presp, sdata); RCU_INIT_POINTER(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); /* make a copy of the chandef, it could be modified below. */ chanreq.oper = *req_chandef; chan = chanreq.oper.chan; if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper, NL80211_IFTYPE_ADHOC)) { if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 || chanreq.oper.width == NL80211_CHAN_WIDTH_10 || chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || chanreq.oper.width == NL80211_CHAN_WIDTH_20) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; } chanreq.oper.width = NL80211_CHAN_WIDTH_20; chanreq.oper.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper, NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; } } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, &chanreq.oper, NL80211_IFTYPE_ADHOC); if (err < 0) { sdata_info(sdata, "Failed to join IBSS, invalid chandef\n"); return; } if (err > 0 && !ifibss->userspace_handles_dfs) { sdata_info(sdata, "Failed to join IBSS, DFS channel without control program\n"); return; } radar_required = err; if (ieee80211_link_use_channel(&sdata->deflink, &chanreq, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { sdata_info(sdata, "Failed to join IBSS, no channel context\n"); return; } sdata->deflink.radar_required = radar_required; memcpy(ifibss->bssid, bssid, ETH_ALEN); presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates, capability, tsf, &chanreq.oper, &have_higher_than_11mbit, NULL); if (!presp) return; rcu_assign_pointer(ifibss->presp, presp); mgmt = (void *)presp->head; sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; sdata->vif.cfg.ssid_len = ifibss->ssid_len; memcpy(sdata->vif.cfg.ssid, ifibss->ssid, ifibss->ssid_len); bss_change = BSS_CHANGED_BEACON_INT; bss_change |= ieee80211_reset_erp_info(sdata); bss_change |= BSS_CHANGED_BSSID; bss_change |= BSS_CHANGED_BEACON; bss_change |= BSS_CHANGED_BEACON_ENABLED; bss_change |= BSS_CHANGED_BASIC_RATES; bss_change |= BSS_CHANGED_HT; bss_change |= BSS_CHANGED_IBSS; bss_change |= BSS_CHANGED_SSID; /* * In 5 GHz/802.11a, we can always use short slot time. * (IEEE 802.11-2012 18.3.8.7) * * In 2.4GHz, we must always use long slots in IBSS for compatibility * reasons. * (IEEE 802.11-2012 19.4.5) * * HT follows these specifications (IEEE 802.11-2012 20.3.18) */ sdata->vif.bss_conf.use_short_slot = chan->band == NL80211_BAND_5GHZ; bss_change |= BSS_CHANGED_ERP_SLOT; /* cf. IEEE 802.11 9.2.12 */ sdata->deflink.operating_11g_mode = chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit; ieee80211_set_wmm_default(&sdata->deflink, true, false); sdata->vif.cfg.ibss_joined = true; sdata->vif.cfg.ibss_creator = creator; err = drv_join_ibss(local, sdata); if (err) { sdata->vif.cfg.ibss_joined = false; sdata->vif.cfg.ibss_creator = false; sdata->vif.bss_conf.enable_beacon = false; sdata->vif.cfg.ssid_len = 0; RCU_INIT_POINTER(ifibss->presp, NULL); kfree_rcu(presp, rcu_head); ieee80211_link_release_channel(&sdata->deflink); sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", err); return; } ieee80211_bss_info_change_notify(sdata, bss_change); ifibss->state = IEEE80211_IBSS_MLME_JOINED; mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); bss_meta.chan = chan; bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt, presp->head_len, GFP_KERNEL); cfg80211_put_bss(local->hw.wiphy, bss); netif_carrier_on(sdata->dev); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL); } static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) { struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; u32 basic_rates; int i, j; u16 beacon_int = cbss->beacon_interval; const struct cfg80211_bss_ies *ies; enum nl80211_channel_type chan_type; u64 tsf; u32 rate_flags; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (beacon_int < 10) beacon_int = 10; switch (sdata->u.ibss.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: chan_type = cfg80211_get_chandef_type(&sdata->u.ibss.chandef); cfg80211_chandef_create(&chandef, cbss->channel, chan_type); break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: cfg80211_chandef_create(&chandef, cbss->channel, NL80211_CHAN_NO_HT); chandef.width = sdata->u.ibss.chandef.width; break; case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: chandef = sdata->u.ibss.chandef; chandef.chan = cbss->channel; break; default: /* fall back to 20 MHz for unsupported modes */ cfg80211_chandef_create(&chandef, cbss->channel, NL80211_CHAN_NO_HT); break; } sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef); basic_rates = 0; for (i = 0; i < bss->supp_rates_len; i++) { int rate = bss->supp_rates[i] & 0x7f; bool is_basic = !!(bss->supp_rates[i] & 0x80); for (j = 0; j < sband->n_bitrates; j++) { int brate; if ((rate_flags & sband->bitrates[j].flags) != rate_flags) continue; brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, 5); if (brate == rate) { if (is_basic) basic_rates |= BIT(j); break; } } } rcu_read_lock(); ies = rcu_dereference(cbss->ies); tsf = ies->tsf; rcu_read_unlock(); __ieee80211_sta_join_ibss(sdata, cbss->bssid, beacon_int, &chandef, basic_rates, cbss->capability, tsf, false); } int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings, u64 *changed) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct beacon_data *presp, *old_presp; struct cfg80211_bss *cbss; const struct cfg80211_bss_ies *ies; u16 capability = WLAN_CAPABILITY_IBSS; u64 tsf; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); if (unlikely(!cbss)) return -EINVAL; rcu_read_lock(); ies = rcu_dereference(cbss->ies); tsf = ies->tsf; rcu_read_unlock(); cfg80211_put_bss(sdata->local->hw.wiphy, cbss); old_presp = sdata_dereference(ifibss->presp, sdata); presp = ieee80211_ibss_build_presp(sdata, sdata->vif.bss_conf.beacon_int, sdata->vif.bss_conf.basic_rates, capability, tsf, &ifibss->chandef, NULL, csa_settings); if (!presp) return -ENOMEM; rcu_assign_pointer(ifibss->presp, presp); if (old_presp) kfree_rcu(old_presp, rcu_head); *changed |= BSS_CHANGED_BEACON; return 0; } int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* When not connected/joined, sending CSA doesn't make sense. */ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) return -ENOLINK; /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); /* XXX: should not really modify cfg80211 data */ if (cbss) { cbss->channel = sdata->deflink.csa.chanreq.oper.chan; cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } ifibss->chandef = sdata->deflink.csa.chanreq.oper; /* generate the beacon */ return ieee80211_ibss_csa_beacon(sdata, NULL, changed); } void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; wiphy_work_cancel(sdata->local->hw.wiphy, &ifibss->csa_connection_drop_work); } static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) __acquires(RCU) { struct ieee80211_sub_if_data *sdata = sta->sdata; u8 addr[ETH_ALEN]; memcpy(addr, sta->sta.addr, ETH_ALEN); ibss_dbg(sdata, "Adding new IBSS station %pM\n", addr); sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); /* authorize the station only if the network is not RSN protected. If * not wait for the userspace to authorize it */ if (!sta->sdata->u.ibss.control_port) sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); rate_control_rate_init(sta); /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); return sta; } static struct sta_info * ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const u8 *addr, u32 supp_rates) __acquires(RCU) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; int band; /* * XXX: Consider removing the least recently used entry and * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); rcu_read_lock(); return NULL; } if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) { rcu_read_lock(); return NULL; } if (!ether_addr_equal(bssid, sdata->u.ibss.bssid)) { rcu_read_lock(); return NULL; } rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) return NULL; band = chanctx_conf->def.chan->band; rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_KERNEL); if (!sta) { rcu_read_lock(); return NULL; } /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta); } static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; int active = 0; struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); if (sta->sdata == sdata && time_is_after_jiffies(last_active + IEEE80211_IBSS_MERGE_INTERVAL)) { active++; break; } } rcu_read_unlock(); return active; } static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct cfg80211_bss *cbss; struct beacon_data *presp; struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); if (cbss) { cfg80211_unlink_bss(local->hw.wiphy, cbss); cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } ifibss->state = IEEE80211_IBSS_MLME_SEARCH; sta_info_flush(sdata, -1); spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { sta = list_first_entry(&ifibss->incomplete_stations, struct sta_info, list); list_del(&sta->list); spin_unlock_bh(&ifibss->incomplete_lock); sta_info_free(local, sta); spin_lock_bh(&ifibss->incomplete_lock); } spin_unlock_bh(&ifibss->incomplete_lock); netif_carrier_off(sdata->dev); sdata->vif.cfg.ibss_joined = false; sdata->vif.cfg.ibss_creator = false; sdata->vif.bss_conf.enable_beacon = false; sdata->vif.cfg.ssid_len = 0; /* remove beacon */ presp = sdata_dereference(ifibss->presp, sdata); RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); if (presp) kfree_rcu(presp, rcu_head); clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); drv_leave_ibss(local, sdata); ieee80211_link_release_channel(&sdata->deflink); } static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; int err; /* if the current channel is a DFS channel, mark the channel as * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, &ifibss->chandef, NL80211_IFTYPE_ADHOC); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef, GFP_ATOMIC); } static bool ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool beacon) { struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; enum nl80211_channel_type ch_type; int err; struct ieee80211_conn_settings conn = { .mode = IEEE80211_CONN_MODE_HT, .bw_limit = IEEE80211_CONN_BW_LIMIT_40, }; u32 vht_cap_info = 0; lockdep_assert_wiphy(sdata->local->hw.wiphy); switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: conn.mode = IEEE80211_CONN_MODE_LEGACY; fallthrough; case NL80211_CHAN_WIDTH_20: conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; default: break; } if (elems->vht_cap_elem) vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); memset(&params, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, vht_cap_info, &conn, ifibss->bssid, false, &csa_ie); /* can't switch to destination channel, fail */ if (err < 0) goto disconnect; /* did not contain a CSA */ if (err) return false; /* channel switch is not supported, disconnect */ if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) goto disconnect; params.count = csa_ie.count; params.chandef = csa_ie.chanreq.oper; switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: /* keep our current HT mode (HT20/HT40+/HT40-), even if * another mode has been announced. The mode is not adopted * within the beacon while doing CSA and we should therefore * keep the mode which we announce. */ ch_type = cfg80211_get_chandef_type(&ifibss->chandef); cfg80211_chandef_create(&params.chandef, params.chandef.chan, ch_type); break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: if (params.chandef.width != ifibss->chandef.width) { sdata_info(sdata, "IBSS %pM received channel switch from incompatible channel width (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, params.chandef.chan->center_freq, params.chandef.width, params.chandef.center_freq1, params.chandef.center_freq2); goto disconnect; } break; default: /* should not happen, conn_flags should prevent VHT modes. */ WARN_ON(1); goto disconnect; } if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef, NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, params.chandef.chan->center_freq, params.chandef.width, params.chandef.center_freq1, params.chandef.center_freq2); goto disconnect; } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, &params.chandef, NL80211_IFTYPE_ADHOC); if (err < 0) goto disconnect; if (err > 0 && !ifibss->userspace_handles_dfs) { /* IBSS-DFS only allowed with a control program */ goto disconnect; } params.radar_required = err; if (cfg80211_chandef_identical(&params.chandef, &sdata->vif.bss_conf.chanreq.oper)) { ibss_dbg(sdata, "received csa with an identical chandef, ignoring\n"); return true; } /* all checks done, now perform the channel switch. */ ibss_dbg(sdata, "received channel switch announcement to go to channel %d MHz\n", params.chandef.chan->center_freq); params.block_tx = !!csa_ie.mode; if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev, &params)) goto disconnect; ieee80211_ibss_csa_mark_radar(sdata); return true; disconnect: ibss_dbg(sdata, "Can't handle channel switch, disconnect\n"); wiphy_work_queue(sdata->local->hw.wiphy, &ifibss->csa_connection_drop_work); ieee80211_ibss_csa_mark_radar(sdata); return true; } static void ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems) { int required_len; if (len < IEEE80211_MIN_ACTION_SIZE + 1) return; /* CSA is the only action we handle for now */ if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; required_len = IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.chan_switch); if (len < required_len) return; if (!sdata->vif.bss_conf.csa_active) ieee80211_ibss_process_chanswitch(sdata, elems, false); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { u16 reason = le16_to_cpu(mgmt->u.deauth.reason_code); if (len < IEEE80211_DEAUTH_FRAME_LEN) return; ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason); sta_info_destroy_addr(sdata, mgmt->sa); } static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { u16 auth_alg, auth_transaction; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 6) return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n", mgmt->bssid, auth_transaction); if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. * However, try to reply to authentication attempts if someone * has actually implemented this. */ ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, 0, NULL, 0, mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0, 0); } static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems, struct ieee80211_channel *channel) { struct sta_info *sta; enum nl80211_band band = rx_status->band; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; bool rates_updated = false; u32 supp_rates = 0; if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return; if (!ether_addr_equal(mgmt->bssid, sdata->u.ibss.bssid)) return; sband = local->hw.wiphy->bands[band]; if (WARN_ON(!sband)) return; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); if (elems->supp_rates) { supp_rates = ieee80211_sta_get_rates(sdata, elems, band, NULL); if (sta) { u32 prev_rates; prev_rates = sta->sta.deflink.supp_rates[band]; sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband); if (sta->sta.deflink.supp_rates[band] != prev_rates) { ibss_dbg(sdata, "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", sta->sta.addr, prev_rates, sta->sta.deflink.supp_rates[band]); rates_updated = true; } } else { rcu_read_unlock(); sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); } } if (sta && !sta->sta.wme && (elems->wmm_info || elems->s1g_capab) && local->hw.queues >= IEEE80211_NUM_ACS) { sta->sta.wme = true; ieee80211_check_fast_xmit(sta); } if (sta && elems->ht_operation && elems->ht_cap_elem && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth; cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(elems->ht_operation, &chandef); memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, &htcap_ie, &sta->deflink); if (elems->vht_operation && elems->vht_cap_elem && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20 && sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) { /* we both use VHT */ struct ieee80211_vht_cap cap_ie; struct ieee80211_sta_vht_cap cap = sta->sta.deflink.vht_cap; u32 vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); ieee80211_chandef_vht_oper(&local->hw, vht_cap_info, elems->vht_operation, elems->ht_operation, &chandef); memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, &cap_ie, NULL, &sta->deflink); if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) rates_updated |= true; } if (bw != sta->sta.deflink.bandwidth) rates_updated |= true; if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef, &chandef)) WARN_ON_ONCE(1); } if (sta && rates_updated) { u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; u8 rx_nss = sta->sta.deflink.rx_nss; /* Force rx_nss recalculation */ sta->sta.deflink.rx_nss = 0; rate_control_rate_init(sta); if (sta->sta.deflink.rx_nss != rx_nss) changed |= IEEE80211_RC_NSS_CHANGED; drv_sta_rc_update(local, sdata, &sta->sta, changed); } rcu_read_unlock(); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; struct cfg80211_bss *cbss; struct ieee80211_bss *bss; struct ieee80211_channel *channel; u64 beacon_timestamp, rx_timestamp; u32 supp_rates = 0; enum nl80211_band band = rx_status->band; channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); if (!channel) return; ieee80211_update_sta_info(sdata, mgmt, len, rx_status, elems, channel); bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, channel); if (!bss) return; cbss = container_of((void *)bss, struct cfg80211_bss, priv); /* same for beacon and probe response */ beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); /* check if we need to merge IBSS */ /* not an IBSS */ if (!(cbss->capability & WLAN_CAPABILITY_IBSS)) goto put_bss; /* different channel */ if (sdata->u.ibss.fixed_channel && sdata->u.ibss.chandef.chan != cbss->channel) goto put_bss; /* different SSID */ if (elems->ssid_len != sdata->u.ibss.ssid_len || memcmp(elems->ssid, sdata->u.ibss.ssid, sdata->u.ibss.ssid_len)) goto put_bss; /* process channel switch */ if (sdata->vif.bss_conf.csa_active || ieee80211_ibss_process_chanswitch(sdata, elems, true)) goto put_bss; /* same BSSID */ if (ether_addr_equal(cbss->bssid, sdata->u.ibss.bssid)) goto put_bss; /* we use a fixed BSSID */ if (sdata->u.ibss.fixed_bssid) goto put_bss; if (ieee80211_have_rx_timestamp(rx_status)) { /* time when timestamp field was received */ rx_timestamp = ieee80211_calculate_rx_timestamp(local, rx_status, len + FCS_LEN, 24); } else { /* * second best option: get current TSF * (will return -1 if not supported) */ rx_timestamp = drv_get_tsf(local, sdata); } ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n", mgmt->sa, mgmt->bssid, (unsigned long long)rx_timestamp); ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n", (unsigned long long)beacon_timestamp, (unsigned long long)(rx_timestamp - beacon_timestamp), jiffies); if (beacon_timestamp > rx_timestamp) { ibss_dbg(sdata, "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", mgmt->bssid); ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(sdata, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); rcu_read_unlock(); } put_bss: ieee80211_rx_bss_put(local, bss); } void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, const u8 *addr, u32 supp_rates) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; int band; /* * XXX: Consider removing the least recently used entry and * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); return; } if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) return; if (!ether_addr_equal(bssid, sdata->u.ibss.bssid)) return; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) { rcu_read_unlock(); return; } band = chanctx_conf->def.chan->band; rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); if (!sta) return; /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); spin_unlock(&ifibss->incomplete_lock); wiphy_work_queue(local->hw.wiphy, &sdata->work); } static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); if (sdata != sta->sdata) continue; if (time_is_before_jiffies(last_active + exp_time) || (time_is_before_jiffies(last_active + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? "not authorized " : "", sta->sta.addr); ieee80211_send_deauth_disassoc(sdata, sta->sta.addr, ifibss->bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, frame_buf); WARN_ON(__sta_info_destroy(sta)); } } } /* * This function is called with state == IEEE80211_IBSS_MLME_JOINED */ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; lockdep_assert_wiphy(sdata->local->hw.wiphy); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); ieee80211_ibss_sta_expire(sdata); if (time_before(jiffies, ifibss->last_scan_completed + IEEE80211_IBSS_MERGE_INTERVAL)) return; if (ieee80211_sta_active_ibss(sdata)) return; if (ifibss->fixed_channel) return; sdata_info(sdata, "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, NULL, 0); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; u8 bssid[ETH_ALEN]; u16 capability; int i; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); } else { /* Generate random, not broadcast, locally administered BSSID. Mix in * own MAC address to make sure that devices that do not have proper * random number generator get different BSSID. */ get_random_bytes(bssid, ETH_ALEN); for (i = 0; i < ETH_ALEN; i++) bssid[i] ^= sdata->vif.addr[i]; bssid[0] &= ~0x01; bssid[0] |= 0x02; } sdata_info(sdata, "Creating new IBSS network, BSSID %pM\n", bssid); capability = WLAN_CAPABILITY_IBSS; if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, &ifibss->chandef, ifibss->basic_rates, capability, 0, true); } static unsigned int ibss_setup_channels(struct wiphy *wiphy, struct ieee80211_channel **channels, unsigned int channels_max, u32 center_freq, u32 width) { struct ieee80211_channel *chan = NULL; unsigned int n_chan = 0; u32 start_freq, end_freq, freq; if (width <= 20) { start_freq = center_freq; end_freq = center_freq; } else { start_freq = center_freq - width / 2 + 10; end_freq = center_freq + width / 2 - 10; } for (freq = start_freq; freq <= end_freq; freq += 20) { chan = ieee80211_get_channel(wiphy, freq); if (!chan) continue; if (n_chan >= channels_max) return n_chan; channels[n_chan] = chan; n_chan++; } return n_chan; } static unsigned int ieee80211_ibss_setup_scan_channels(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, struct ieee80211_channel **channels, unsigned int channels_max) { unsigned int n_chan = 0; u32 width, cf1, cf2 = 0; switch (chandef->width) { case NL80211_CHAN_WIDTH_40: width = 40; break; case NL80211_CHAN_WIDTH_80P80: cf2 = chandef->center_freq2; fallthrough; case NL80211_CHAN_WIDTH_80: width = 80; break; case NL80211_CHAN_WIDTH_160: width = 160; break; default: width = 20; break; } cf1 = chandef->center_freq1; n_chan = ibss_setup_channels(wiphy, channels, channels_max, cf1, width); if (cf2) n_chan += ibss_setup_channels(wiphy, &channels[n_chan], channels_max - n_chan, cf2, width); return n_chan; } /* * This function is called with state == IEEE80211_IBSS_MLME_SEARCH */ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct cfg80211_bss *cbss; struct ieee80211_channel *chan = NULL; const u8 *bssid = NULL; int active_ibss; lockdep_assert_wiphy(sdata->local->hw.wiphy); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); if (active_ibss) return; if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) chan = ifibss->chandef.chan; if (!is_zero_ether_addr(ifibss->bssid)) bssid = ifibss->bssid; cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, ifibss->ssid, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); if (cbss) { struct ieee80211_bss *bss; bss = (void *)cbss->priv; ibss_dbg(sdata, "sta_find_ibss: selected %pM current %pM\n", cbss->bssid, ifibss->bssid); sdata_info(sdata, "Selected IBSS BSSID %pM based on configured SSID\n", cbss->bssid); ieee80211_sta_join_ibss(sdata, bss); ieee80211_rx_bss_put(local, bss); return; } /* if a fixed bssid and a fixed freq have been provided create the IBSS * directly and do not waste time scanning */ if (ifibss->fixed_bssid && ifibss->fixed_channel) { sdata_info(sdata, "Created IBSS using preconfigured BSSID %pM\n", bssid); ieee80211_sta_create_ibss(sdata); return; } ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n"); /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { struct ieee80211_channel *channels[8]; unsigned int num; sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); if (ifibss->fixed_channel) { num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, &ifibss->chandef, channels, ARRAY_SIZE(channels)); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, channels, num); } else { ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, NULL, 0); } } else { int interval = IEEE80211_SCAN_INTERVAL; if (time_after(jiffies, ifibss->ibss_join_req + IEEE80211_IBSS_JOIN_TIMEOUT)) ieee80211_sta_create_ibss(sdata); mod_timer(&ifibss->timer, round_jiffies(jiffies + interval)); } } static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *req) { struct ieee80211_mgmt *mgmt = (void *)req->data; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; int tx_last_beacon, len = req->len; struct sk_buff *skb; struct beacon_data *presp; u8 *pos, *end; lockdep_assert_wiphy(sdata->local->hw.wiphy); presp = sdata_dereference(ifibss->presp, sdata); if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !presp) return; tx_last_beacon = drv_tx_last_beacon(local); ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n", mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; if (!ether_addr_equal(mgmt->bssid, ifibss->bssid) && !is_broadcast_ether_addr(mgmt->bssid)) return; end = ((u8 *) mgmt) + len; pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { ibss_dbg(sdata, "Invalid SSID IE in ProbeReq from %pM\n", mgmt->sa); return; } if (pos[1] != 0 && (pos[1] != ifibss->ssid_len || memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) { /* Ignore ProbeReq for foreign SSID */ return; } /* Reply with ProbeResp */ skb = dev_alloc_skb(local->tx_headroom + presp->head_len); if (!skb) return; skb_reserve(skb, local->tx_headroom); skb_put_data(skb, presp->head, presp->head_len); memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN); ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; /* avoid excessive retries for probe request to wildcard SSIDs */ if (pos[1] == 0) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_NO_ACK; ieee80211_tx_skb(sdata, skb); } static void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { size_t baselen; struct ieee802_11_elems *elems; BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != offsetof(typeof(mgmt->u.beacon), variable)); /* * either beacon or probe_resp but the variable field is at the * same offset */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) return; elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, false, NULL); if (elems) { ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems); kfree(elems); } } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; struct ieee802_11_elems *elems; int ies_len; rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); if (!sdata->u.ibss.ssid_len) return; /* not ready to merge yet */ switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: ieee80211_rx_mgmt_probe_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DEAUTH: ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: switch (mgmt->u.action.category) { case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); if (ies_len < 0) break; elems = ieee802_11_parse_elems( mgmt->u.action.u.chan_switch.variable, ies_len, true, NULL); if (elems && !elems->parse_error) ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len, rx_status, elems); kfree(elems); break; } } } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; /* * Work could be scheduled after scan or similar * when we aren't even joined (or trying) with a * network. */ if (!ifibss->ssid_len) return; spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { sta = list_first_entry(&ifibss->incomplete_stations, struct sta_info, list); list_del(&sta->list); spin_unlock_bh(&ifibss->incomplete_lock); ieee80211_ibss_finish_sta(sta); rcu_read_unlock(); spin_lock_bh(&ifibss->incomplete_lock); } spin_unlock_bh(&ifibss->incomplete_lock); switch (ifibss->state) { case IEEE80211_IBSS_MLME_SEARCH: ieee80211_sta_find_ibss(sdata); break; case IEEE80211_IBSS_MLME_JOINED: ieee80211_sta_merge_ibss(sdata); break; default: WARN_ON(1); break; } } static void ieee80211_ibss_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.ibss.timer); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); wiphy_work_init(&ifibss->csa_connection_drop_work, ieee80211_csa_connection_drop_work); } /* scan finished notification */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; sdata->u.ibss.last_scan_completed = jiffies; } } int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { u64 changed = 0; u32 rate_flags; struct ieee80211_supported_band *sband; enum ieee80211_chanctx_mode chanmode; struct ieee80211_local *local = sdata->local; int radar_detect_width = 0; int i; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ return -EOPNOTSUPP; } ret = cfg80211_chandef_dfs_required(local->hw.wiphy, &params->chandef, sdata->wdev.iftype); if (ret < 0) return ret; if (ret > 0) { if (!params->userspace_handles_dfs) return -EINVAL; radar_detect_width = BIT(params->chandef.width); } chanmode = (params->channel_fixed && !ret) ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; ret = ieee80211_check_combinations(sdata, &params->chandef, chanmode, radar_detect_width, -1); if (ret < 0) return ret; if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; } else sdata->u.ibss.fixed_bssid = false; sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.control_port = params->control_port; sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs; sdata->u.ibss.basic_rates = params->basic_rates; sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ rate_flags = ieee80211_chandef_rate_flags(&params->chandef); sband = local->hw.wiphy->bands[params->chandef.chan->band]; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) sdata->u.ibss.basic_rates &= ~BIT(i); } memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, sizeof(params->mcast_rate)); sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->u.ibss.chandef = params->chandef; sdata->u.ibss.fixed_channel = params->channel_fixed; if (params->ie) { sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len, GFP_KERNEL); if (sdata->u.ibss.ie) sdata->u.ibss.ie_len = params->ie_len; } sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; sdata->u.ibss.ibss_join_req = jiffies; memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; memcpy(&sdata->u.ibss.ht_capa, &params->ht_capa, sizeof(sdata->u.ibss.ht_capa)); memcpy(&sdata->u.ibss.ht_capa_mask, &params->ht_capa_mask, sizeof(sdata->u.ibss.ht_capa_mask)); /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though * the HT Protection field were set to non-HT mixed mode. * * In an IBSS, the RIFS Mode field of the HT Operation element is * also reserved, but an HT STA shall operate as though this field * were set to 1. */ sdata->vif.bss_conf.ht_operation_mode |= IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_PARAM_RIFS_MODE; changed |= BSS_CHANGED_HT | BSS_CHANGED_MCAST_RATE; ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = local->rx_chains; sdata->control_port_over_nl80211 = params->control_port_over_nl80211; wiphy_work_queue(local->hw.wiphy, &sdata->work); return 0; } int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; ieee80211_ibss_disconnect(sdata); ifibss->ssid_len = 0; eth_zero_addr(ifibss->bssid); /* remove beacon */ kfree(sdata->u.ibss.ie); sdata->u.ibss.ie = NULL; sdata->u.ibss.ie_len = 0; /* on the next join, re-program HT parameters */ memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa)); memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask)); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); del_timer_sync(&sdata->u.ibss.timer); return 0; }
21 21 8698 8698 4 4 4 1 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 // SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * * NOHZ implementation for low and high resolution timers * * Started by: Thomas Gleixner and Ingo Molnar */ #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/percpu.h> #include <linux/nmi.h> #include <linux/profile.h> #include <linux/sched/signal.h> #include <linux/sched/clock.h> #include <linux/sched/stat.h> #include <linux/sched/nohz.h> #include <linux/sched/loadavg.h> #include <linux/module.h> #include <linux/irq_work.h> #include <linux/posix-timers.h> #include <linux/context_tracking.h> #include <linux/mm.h> #include <asm/irq_regs.h> #include "tick-internal.h" #include <trace/events/timer.h> /* * Per-CPU nohz control structure */ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); } /* * The time when the last jiffy update happened. Write access must hold * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a * consistent view of jiffies and last_jiffies_update. */ static ktime_t last_jiffies_update; /* * Must be called with interrupts disabled ! */ static void tick_do_update_jiffies64(ktime_t now) { unsigned long ticks = 1; ktime_t delta, nextp; /* * 64-bit can do a quick check without holding the jiffies lock and * without looking at the sequence count. The smp_load_acquire() * pairs with the update done later in this function. * * 32-bit cannot do that because the store of 'tick_next_period' * consists of two 32-bit stores, and the first store could be * moved by the CPU to a random point in the future. */ if (IS_ENABLED(CONFIG_64BIT)) { if (ktime_before(now, smp_load_acquire(&tick_next_period))) return; } else { unsigned int seq; /* * Avoid contention on 'jiffies_lock' and protect the quick * check with the sequence count. */ do { seq = read_seqcount_begin(&jiffies_seq); nextp = tick_next_period; } while (read_seqcount_retry(&jiffies_seq, seq)); if (ktime_before(now, nextp)) return; } /* Quick check failed, i.e. update is required. */ raw_spin_lock(&jiffies_lock); /* * Re-evaluate with the lock held. Another CPU might have done the * update already. */ if (ktime_before(now, tick_next_period)) { raw_spin_unlock(&jiffies_lock); return; } write_seqcount_begin(&jiffies_seq); delta = ktime_sub(now, tick_next_period); if (unlikely(delta >= TICK_NSEC)) { /* Slow path for long idle sleep times */ s64 incr = TICK_NSEC; ticks += ktime_divns(delta, incr); last_jiffies_update = ktime_add_ns(last_jiffies_update, incr * ticks); } else { last_jiffies_update = ktime_add_ns(last_jiffies_update, TICK_NSEC); } /* Advance jiffies to complete the 'jiffies_seq' protected job */ jiffies_64 += ticks; /* Keep the tick_next_period variable up to date */ nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC); if (IS_ENABLED(CONFIG_64BIT)) { /* * Pairs with smp_load_acquire() in the lockless quick * check above, and ensures that the update to 'jiffies_64' is * not reordered vs. the store to 'tick_next_period', neither * by the compiler nor by the CPU. */ smp_store_release(&tick_next_period, nextp); } else { /* * A plain store is good enough on 32-bit, as the quick check * above is protected by the sequence count. */ tick_next_period = nextp; } /* * Release the sequence count. calc_global_load() below is not * protected by it, but 'jiffies_lock' needs to be held to prevent * concurrent invocations. */ write_seqcount_end(&jiffies_seq); calc_global_load(); raw_spin_unlock(&jiffies_lock); update_wall_time(); } /* * Initialize and return retrieve the jiffies update. */ static ktime_t tick_init_jiffy_update(void) { ktime_t period; raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); /* Have we started the jiffies update yet ? */ if (last_jiffies_update == 0) { u32 rem; /* * Ensure that the tick is aligned to a multiple of * TICK_NSEC. */ div_u64_rem(tick_next_period, TICK_NSEC, &rem); if (rem) tick_next_period += TICK_NSEC - rem; last_jiffies_update = tick_next_period; } period = last_jiffies_update; write_seqcount_end(&jiffies_seq); raw_spin_unlock(&jiffies_lock); return period; } static inline int tick_sched_flag_test(struct tick_sched *ts, unsigned long flag) { return !!(ts->flags & flag); } static inline void tick_sched_flag_set(struct tick_sched *ts, unsigned long flag) { lockdep_assert_irqs_disabled(); ts->flags |= flag; } static inline void tick_sched_flag_clear(struct tick_sched *ts, unsigned long flag) { lockdep_assert_irqs_disabled(); ts->flags &= ~flag; } #define MAX_STALLED_JIFFIES 5 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int tick_cpu, cpu = smp_processor_id(); /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the CPU in charge went * into a long sleep. If two CPUs happen to assign themselves to * this duty, then the jiffies update is still serialized by * 'jiffies_lock'. * * If nohz_full is enabled, this should not happen because the * 'tick_do_timer_cpu' CPU never relinquishes. */ tick_cpu = READ_ONCE(tick_do_timer_cpu); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL WARN_ON_ONCE(tick_nohz_full_running); #endif WRITE_ONCE(tick_do_timer_cpu, cpu); tick_cpu = cpu; } /* Check if jiffies need an update */ if (tick_cpu == cpu) tick_do_update_jiffies64(now); /* * If the jiffies update stalled for too long (timekeeper in stop_machine() * or VMEXIT'ed for several msecs), force an update. */ if (ts->last_tick_jiffies != jiffies) { ts->stalled_jiffies = 0; ts->last_tick_jiffies = READ_ONCE(jiffies); } else { if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { tick_do_update_jiffies64(now); ts->stalled_jiffies = 0; ts->last_tick_jiffies = READ_ONCE(jiffies); } } if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) ts->got_idle_tick = 1; } static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long * time. This happens on completely idle SMP systems while * waiting on the login prompt. We also increment the "start of * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too many ticks. */ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; /* * In case the current tick fired too early past its expected * expiration, make sure we don't bypass the next clock reprogramming * to the same deadline. */ ts->next_tick = 0; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); } /* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled. */ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); tick_sched_do_timer(ts, now); /* * Do not call when we are not in IRQ context and have * no valid 'regs' pointer */ if (regs) tick_sched_handle(ts, regs); else ts->next_tick = 0; /* * In dynticks mode, tick reprogram is deferred: * - to the idle task if in dynticks-idle * - to IRQ exit if in full-dynticks. */ if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED))) return HRTIMER_NORESTART; hrtimer_forward(timer, now, TICK_NSEC); return HRTIMER_RESTART; } static void tick_sched_timer_cancel(struct tick_sched *ts) { if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) hrtimer_cancel(&ts->sched_timer); else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) tick_program_event(KTIME_MAX, 1); } #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; EXPORT_SYMBOL_GPL(tick_nohz_full_mask); bool tick_nohz_full_running; EXPORT_SYMBOL_GPL(tick_nohz_full_running); static atomic_t tick_dep_mask; static bool check_tick_dependency(atomic_t *dep) { int val = atomic_read(dep); if (val & TICK_DEP_MASK_POSIX_TIMER) { trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); return true; } if (val & TICK_DEP_MASK_PERF_EVENTS) { trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); return true; } if (val & TICK_DEP_MASK_SCHED) { trace_tick_stop(0, TICK_DEP_MASK_SCHED); return true; } if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) { trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); return true; } if (val & TICK_DEP_MASK_RCU) { trace_tick_stop(0, TICK_DEP_MASK_RCU); return true; } if (val & TICK_DEP_MASK_RCU_EXP) { trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP); return true; } return false; } static bool can_stop_full_tick(int cpu, struct tick_sched *ts) { lockdep_assert_irqs_disabled(); if (unlikely(!cpu_online(cpu))) return false; if (check_tick_dependency(&tick_dep_mask)) return false; if (check_tick_dependency(&ts->tick_dep_mask)) return false; if (check_tick_dependency(&current->tick_dep_mask)) return false; if (check_tick_dependency(&current->signal->tick_dep_mask)) return false; return true; } static void nohz_full_kick_func(struct irq_work *work) { /* Empty, the tick restart happens on tick_nohz_irq_exit() */ } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = IRQ_WORK_INIT_HARD(nohz_full_kick_func); /* * Kick this CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), * is NMI safe. */ static void tick_nohz_full_kick(void) { if (!tick_nohz_full_cpu(smp_processor_id())) return; irq_work_queue(this_cpu_ptr(&nohz_full_kick_work)); } /* * Kick the CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. */ void tick_nohz_full_kick_cpu(int cpu) { if (!tick_nohz_full_cpu(cpu)) return; irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); } static void tick_nohz_kick_task(struct task_struct *tsk) { int cpu; /* * If the task is not running, run_posix_cpu_timers() * has nothing to elapse, and an IPI can then be optimized out. * * activate_task() STORE p->tick_dep_mask * STORE p->on_rq * __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or()) * LOCK rq->lock LOAD p->on_rq * smp_mb__after_spin_lock() * tick_nohz_task_switch() * LOAD p->tick_dep_mask */ if (!sched_task_on_rq(tsk)) return; /* * If the task concurrently migrates to another CPU, * we guarantee it sees the new tick dependency upon * schedule. * * set_task_cpu(p, cpu); * STORE p->cpu = @cpu * __schedule() (switch to task 'p') * LOCK rq->lock * smp_mb__after_spin_lock() STORE p->tick_dep_mask * tick_nohz_task_switch() smp_mb() (atomic_fetch_or()) * LOAD p->tick_dep_mask LOAD p->cpu */ cpu = task_cpu(tsk); preempt_disable(); if (cpu_online(cpu)) tick_nohz_full_kick_cpu(cpu); preempt_enable(); } /* * Kick all full dynticks CPUs in order to force these to re-evaluate * their dependency on the tick and restart it if necessary. */ static void tick_nohz_full_kick_all(void) { int cpu; if (!tick_nohz_full_running) return; preempt_disable(); for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask) tick_nohz_full_kick_cpu(cpu); preempt_enable(); } static void tick_nohz_dep_set_all(atomic_t *dep, enum tick_dep_bits bit) { int prev; prev = atomic_fetch_or(BIT(bit), dep); if (!prev) tick_nohz_full_kick_all(); } /* * Set a global tick dependency. Used by perf events that rely on freq and * unstable clocks. */ void tick_nohz_dep_set(enum tick_dep_bits bit) { tick_nohz_dep_set_all(&tick_dep_mask, bit); } void tick_nohz_dep_clear(enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &tick_dep_mask); } /* * Set per-CPU tick dependency. Used by scheduler and perf events in order to * manage event-throttling. */ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { int prev; struct tick_sched *ts; ts = per_cpu_ptr(&tick_cpu_sched, cpu); prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask); if (!prev) { preempt_disable(); /* Perf needs local kick that is NMI safe */ if (cpu == smp_processor_id()) { tick_nohz_full_kick(); } else { /* Remote IRQ work not NMI-safe */ if (!WARN_ON_ONCE(in_nmi())) tick_nohz_full_kick_cpu(cpu); } preempt_enable(); } } EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu); void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); atomic_andnot(BIT(bit), &ts->tick_dep_mask); } EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); /* * Set a per-task tick dependency. RCU needs this. Also posix CPU timers * in order to elapse per task timers. */ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) tick_nohz_kick_task(tsk); } EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &tsk->tick_dep_mask); } EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task); /* * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse * per process timers. */ void tick_nohz_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { int prev; struct signal_struct *sig = tsk->signal; prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask); if (!prev) { struct task_struct *t; lockdep_assert_held(&tsk->sighand->siglock); __for_each_thread(sig, t) tick_nohz_kick_task(t); } } void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &sig->tick_dep_mask); } /* * Re-evaluate the need for the tick as we switch the current task. * It might need the tick due to per task/process properties: * perf events, posix CPU timers, ... */ void __tick_nohz_task_switch(void) { struct tick_sched *ts; if (!tick_nohz_full_cpu(smp_processor_id())) return; ts = this_cpu_ptr(&tick_cpu_sched); if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { if (atomic_read(&current->tick_dep_mask) || atomic_read(&current->signal->tick_dep_mask)) tick_nohz_full_kick(); } } /* Get the boot-time nohz CPU list from the kernel parameters. */ void __init tick_nohz_full_setup(cpumask_var_t cpumask) { alloc_bootmem_cpumask_var(&tick_nohz_full_mask); cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { /* * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound * timers, workqueues, timekeeping, ...) on behalf of full dynticks * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu) return false; return true; } static int tick_nohz_cpu_down(unsigned int cpu) { return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY; } void __init tick_nohz_init(void) { int cpu, ret; if (!tick_nohz_full_running) return; /* * Full dynticks uses IRQ work to drive the tick rescheduling on safe * locking contexts. But then we need IRQ work to raise its own * interrupts to avoid circular dependency on the tick. */ if (!arch_irq_work_has_interrupt()) { pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n"); cpumask_clear(tick_nohz_full_mask); tick_nohz_full_running = false; return; } if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) && !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) { cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { pr_warn("NO_HZ: Clearing %d from nohz_full range " "for timekeeping\n", cpu); cpumask_clear_cpu(cpu, tick_nohz_full_mask); } } for_each_cpu(cpu, tick_nohz_full_mask) ct_cpu_track_user(cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "kernel/nohz:predown", NULL, tick_nohz_cpu_down); WARN_ON(ret < 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); } #endif /* #ifdef CONFIG_NO_HZ_FULL */ /* * NOHZ - aka dynamic tick functionality */ #ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ bool tick_nohz_enabled __read_mostly = true; unsigned long tick_nohz_active __read_mostly; /* * Enable / Disable tickless mode */ static int __init setup_tick_nohz(char *str) { return (kstrtobool(str, &tick_nohz_enabled) == 0); } __setup("nohz=", setup_tick_nohz); bool tick_nohz_tick_stopped(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } bool tick_nohz_tick_stopped_cpu(int cpu) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * @now: current ktime_t * * Called from interrupt entry when the CPU was idle * * In case the sched_tick was stopped on this CPU, we have to check if jiffies * must be updated. Otherwise an interrupt handler could use a stale jiffy * value. We do this unconditionally on any CPU, as we don't know whether the * CPU, which has the update task assigned, is in a long sleep. */ static void tick_nohz_update_jiffies(ktime_t now) { unsigned long flags; __this_cpu_write(tick_cpu_sched.idle_waketime, now); local_irq_save(flags); tick_do_update_jiffies64(now); local_irq_restore(flags); touch_softlockup_watchdog_sched(); } static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { ktime_t delta; if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))) return; delta = ktime_sub(now, ts->idle_entrytime); write_seqcount_begin(&ts->idle_sleeptime_seq); if (nr_iowait_cpu(smp_processor_id()) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); else ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_wakeup_event(); } static void tick_nohz_start_idle(struct tick_sched *ts) { write_seqcount_begin(&ts->idle_sleeptime_seq); ts->idle_entrytime = ktime_get(); tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_sleep_event(); } static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, bool compute_delta, u64 *last_update_time) { ktime_t now, idle; unsigned int seq; if (!tick_nohz_active) return -1; now = ktime_get(); if (last_update_time) *last_update_time = ktime_to_us(now); do { seq = read_seqcount_begin(&ts->idle_sleeptime_seq); if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) { ktime_t delta = ktime_sub(now, ts->idle_entrytime); idle = ktime_add(*sleeptime, delta); } else { idle = *sleeptime; } } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq)); return ktime_to_us(idle); } /** * get_cpu_idle_time_us - get the total idle time of a CPU * @cpu: CPU number to query * @last_update_time: variable to store update time in. Do not update * counters if NULL. * * Return the cumulative idle time (since boot) for a given * CPU, in microseconds. Note that this is partially broken due to * the counter of iowait tasks that can be remotely updated without * any synchronization. Therefore it is possible to observe backward * values within two consecutive reads. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); return get_cpu_sleep_time_us(ts, &ts->idle_sleeptime, !nr_iowait_cpu(cpu), last_update_time); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); /** * get_cpu_iowait_time_us - get the total iowait time of a CPU * @cpu: CPU number to query * @last_update_time: variable to store update time in. Do not update * counters if NULL. * * Return the cumulative iowait time (since boot) for a given * CPU, in microseconds. Note this is partially broken due to * the counter of iowait tasks that can be remotely updated without * any synchronization. Therefore it is possible to observe backward * values within two consecutive reads. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu */ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); return get_cpu_sleep_time_us(ts, &ts->iowait_sleeptime, nr_iowait_cpu(cpu), last_update_time); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); hrtimer_set_expires(&ts->sched_timer, ts->last_tick); /* Forward the time to expire in the future */ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); } else { tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } /* * Reset to make sure the next tick stop doesn't get fooled by past * cached clock deadline. */ ts->next_tick = 0; } static inline bool local_timer_softirq_pending(void) { return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } /* * Read jiffies and the time when jiffies were updated last */ u64 get_jiffies_update(unsigned long *basej) { unsigned long basejiff; unsigned int seq; u64 basemono; do { seq = read_seqcount_begin(&jiffies_seq); basemono = last_jiffies_update; basejiff = jiffies; } while (read_seqcount_retry(&jiffies_seq, seq)); *basej = basejiff; return basemono; } /** * tick_nohz_next_event() - return the clock monotonic based next event * @ts: pointer to tick_sched struct * @cpu: CPU number * * Return: * *%0 - When the next event is a maximum of TICK_NSEC in the future * and the tick is not stopped yet * *%next_event - Next event based on clock monotonic */ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { u64 basemono, next_tick, delta, expires; unsigned long basejiff; int tick_cpu; basemono = get_jiffies_update(&basejiff); ts->last_jiffies = basejiff; ts->timer_expires_base = basemono; /* * Keep the periodic tick, when RCU, architecture or irq_work * requests it. * Aside of that, check whether the local timer softirq is * pending. If so, its a bad idea to call get_next_timer_interrupt(), * because there is an already expired timer, so it will request * immediate expiry, which rearms the hardware timer with a * minimal delta, which brings us back to this place * immediately. Lather, rinse and repeat... */ if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* * Get the next pending timer. If high resolution * timers are enabled this only takes the timer wheel * timers into account. If high resolution timers are * disabled this also looks at the next expiring * hrtimer. */ next_tick = get_next_timer_interrupt(basejiff, basemono); ts->next_timer = next_tick; } /* Make sure next_tick is never before basemono! */ if (WARN_ON_ONCE(basemono > next_tick)) next_tick = basemono; /* * If the tick is due in the next period, keep it ticking or * force prod the timer. */ delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { /* * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. */ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->timer_expires = 0; goto out; } } /* * If this CPU is the one which had the do_timer() duty last, we limit * the sleep time to the timekeeping 'max_deferment' value. * Otherwise we can sleep as long as we want. */ delta = timekeeping_max_deferment(); tick_cpu = READ_ONCE(tick_do_timer_cpu); if (tick_cpu != cpu && (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST))) delta = KTIME_MAX; /* Calculate the next expiry time */ if (delta < (KTIME_MAX - basemono)) expires = basemono + delta; else expires = KTIME_MAX; ts->timer_expires = min_t(u64, expires, next_tick); out: return ts->timer_expires; } static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED); int tick_cpu; u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ ts->timer_expires_base = 0; /* * Now the tick should be stopped definitely - so the timer base needs * to be marked idle as well to not miss a newly queued timer. */ expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle); if (expires > ts->timer_expires) { /* * This path could only happen when the first timer was removed * between calculating the possible sleep length and now (when * high resolution mode is not active, timer could also be a * hrtimer). * * We have to stick to the original calculated expiry value to * not stop the tick for too long with a shallow C-state (which * was programmed by cpuidle because of an early next expiration * value). */ expires = ts->timer_expires; } /* If the timer base is not idle, retain the not yet stopped tick. */ if (!timer_idle) return; /* * If this CPU is the one which updates jiffies, then give up * the assignment and let it be taken by the CPU which runs * the tick timer next, which might be this CPU as well. If we * don't drop this here, the jiffies might be stale and * do_timer() never gets invoked. Keep track of the fact that it * was the one which had the do_timer() duty last. */ tick_cpu = READ_ONCE(tick_do_timer_cpu); if (tick_cpu == cpu) { WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST); } else if (tick_cpu != TICK_DO_TIMER_NONE) { tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST); } /* Skip reprogram of event if it's not changed */ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) { /* Sanity check: make sure clockevent is actually programmed */ if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) return; WARN_ONCE(1, "basemono: %llu ts->next_tick: %llu dev->next_event: %llu " "timer->active: %d timer->expires: %llu\n", basemono, ts->next_tick, dev->next_event, hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer)); } /* * tick_nohz_stop_tick() can be called several times before * tick_nohz_restart_sched_tick() is called. This happens when * interrupts arrive which do not cause a reschedule. In the first * call we save the current tick time, so we can restart the * scheduler tick in tick_nohz_restart_sched_tick(). */ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { calc_load_nohz_start(); quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); tick_sched_flag_set(ts, TS_FLAG_STOPPED); trace_tick_stop(1, TICK_DEP_MASK_NONE); } ts->next_tick = expires; /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. */ if (unlikely(expires == KTIME_MAX)) { tick_sched_timer_cancel(ts); return; } if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED_HARD); } else { hrtimer_set_expires(&ts->sched_timer, expires); tick_program_event(expires, 1); } } static void tick_nohz_retain_tick(struct tick_sched *ts) { ts->timer_expires_base = 0; } #ifdef CONFIG_NO_HZ_FULL static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu) { if (tick_nohz_next_event(ts, cpu)) tick_nohz_stop_tick(ts, cpu); else tick_nohz_retain_tick(ts); } #endif /* CONFIG_NO_HZ_FULL */ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ tick_do_update_jiffies64(now); /* * Clear the timer idle flag, so we avoid IPIs on remote queueing and * the clock forward checks in the enqueue path: */ timer_clear_idle(); calc_load_nohz_stop(); touch_softlockup_watchdog_sched(); /* Cancel the scheduled timer and restore the tick: */ tick_sched_flag_clear(ts, TS_FLAG_STOPPED); tick_nohz_restart(ts, now); } static void __tick_nohz_full_update_tick(struct tick_sched *ts, ktime_t now) { #ifdef CONFIG_NO_HZ_FULL int cpu = smp_processor_id(); if (can_stop_full_tick(cpu, ts)) tick_nohz_full_stop_tick(ts, cpu); else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) tick_nohz_restart_sched_tick(ts, now); #endif } static void tick_nohz_full_update_tick(struct tick_sched *ts) { if (!tick_nohz_full_cpu(smp_processor_id())) return; if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ)) return; __tick_nohz_full_update_tick(ts, ktime_get()); } /* * A pending softirq outside an IRQ (or softirq disabled section) context * should be waiting for ksoftirqd to handle it. Therefore we shouldn't * reach this code due to the need_resched() early check in can_stop_idle_tick(). * * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, * triggering the code below, since wakep_softirqd() is ignored. * */ static bool report_idle_softirq(void) { static int ratelimit; unsigned int pending = local_softirq_pending(); if (likely(!pending)) return false; /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ if (!cpu_active(smp_processor_id())) { pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; if (!pending) return false; } if (ratelimit >= 10) return false; /* On RT, softirq handling may be waiting on some lock */ if (local_bh_blocked()) return false; pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", pending); ratelimit++; return true; } static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { WARN_ON_ONCE(cpu_is_offline(cpu)); if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ))) return false; if (need_resched()) return false; if (unlikely(report_idle_softirq())) return false; if (tick_nohz_full_enabled()) { int tick_cpu = READ_ONCE(tick_do_timer_cpu); /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around */ if (tick_cpu == cpu) return false; /* Should not happen for nohz-full */ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE)) return false; } return true; } /** * tick_nohz_idle_stop_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick */ void tick_nohz_idle_stop_tick(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); int cpu = smp_processor_id(); ktime_t expires; /* * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the * tick timer expiration time is known already. */ if (ts->timer_expires_base) expires = ts->timer_expires; else if (can_stop_idle_tick(cpu, ts)) expires = tick_nohz_next_event(ts, cpu); else return; ts->idle_calls++; if (expires > 0LL) { int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED); tick_nohz_stop_tick(ts, cpu); ts->idle_sleeps++; ts->idle_expires = expires; if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->idle_jiffies = ts->last_jiffies; nohz_balance_enter_idle(cpu); } } else { tick_nohz_retain_tick(ts); } } void tick_nohz_idle_retain_tick(void) { tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched)); } /** * tick_nohz_idle_enter - prepare for entering idle on the current CPU * * Called when we start the idle loop. */ void tick_nohz_idle_enter(void) { struct tick_sched *ts; lockdep_assert_irqs_enabled(); local_irq_disable(); ts = this_cpu_ptr(&tick_cpu_sched); WARN_ON_ONCE(ts->timer_expires_base); tick_sched_flag_set(ts, TS_FLAG_INIDLE); tick_nohz_start_idle(ts); local_irq_enable(); } /** * tick_nohz_irq_exit - Notify the tick about IRQ exit * * A timer may have been added/modified/deleted either by the current IRQ, * or by another place using this IRQ as a notification. This IRQ may have * also updated the RCU callback list. These events may require a * re-evaluation of the next tick. Depending on the context: * * 1) If the CPU is idle and no resched is pending, just proceed with idle * time accounting. The next tick will be re-evaluated on the next idle * loop iteration. * * 2) If the CPU is nohz_full: * * 2.1) If there is any tick dependency, restart the tick if stopped. * * 2.2) If there is no tick dependency, (re-)evaluate the next tick and * stop/update it accordingly. */ void tick_nohz_irq_exit(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) tick_nohz_start_idle(ts); else tick_nohz_full_update_tick(ts); } /** * tick_nohz_idle_got_tick - Check whether or not the tick handler has run * * Return: %true if the tick handler has run, otherwise %false */ bool tick_nohz_idle_got_tick(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (ts->got_idle_tick) { ts->got_idle_tick = 0; return true; } return false; } /** * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer * or the tick, whichever expires first. Note that, if the tick has been * stopped, it returns the next hrtimer. * * Called from power state control code with interrupts disabled * * Return: the next expiration time */ ktime_t tick_nohz_get_next_hrtimer(void) { return __this_cpu_read(tick_cpu_device.evtdev)->next_event; } /** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped * * Called from power state control code with interrupts disabled. * * The return value of this function and/or the value returned by it through the * @delta_next pointer can be negative which must be taken into account by its * callers. * * Return: the expected length of the current sleep */ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); int cpu = smp_processor_id(); /* * The idle entry time is expected to be a sufficient approximation of * the current time at this point. */ ktime_t now = ts->idle_entrytime; ktime_t next_event; WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE)); *delta_next = ktime_sub(dev->next_event, now); if (!can_stop_idle_tick(cpu, ts)) return *delta_next; next_event = tick_nohz_next_event(ts, cpu); if (!next_event) return *delta_next; /* * If the next highres timer to expire is earlier than 'next_event', the * idle governor needs to know that. */ next_event = min_t(u64, next_event, hrtimer_next_event_without(&ts->sched_timer)); return ktime_sub(next_event, now); } /** * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value * for a particular CPU. * @cpu: target CPU number * * Called from the schedutil frequency scaling governor in scheduler context. * * Return: the current idle calls counter value for @cpu */ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) { struct tick_sched *ts = tick_get_tick_sched(cpu); return ts->idle_calls; } static void tick_nohz_account_idle_time(struct tick_sched *ts, ktime_t now) { unsigned long ticks; ts->idle_exittime = now; if (vtime_accounting_enabled_this_cpu()) return; /* * We stopped the tick in idle. update_process_times() would miss the * time we slept, as it does only a 1 tick accounting. * Enforce that this is accounted to idle ! */ ticks = jiffies - ts->idle_jiffies; /* * We might be one off. Do not randomly account a huge number of ticks! */ if (ticks && ticks < LONG_MAX) account_idle_ticks(ticks); } void tick_nohz_idle_restart_tick(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ktime_t now = ktime_get(); tick_nohz_restart_sched_tick(ts, now); tick_nohz_account_idle_time(ts, now); } } static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now) { if (tick_nohz_full_cpu(smp_processor_id())) __tick_nohz_full_update_tick(ts, now); else tick_nohz_restart_sched_tick(ts, now); tick_nohz_account_idle_time(ts, now); } /** * tick_nohz_idle_exit - Update the tick upon idle task exit * * When the idle task exits, update the tick depending on the * following situations: * * 1) If the CPU is not in nohz_full mode (most cases), then * restart the tick. * * 2) If the CPU is in nohz_full mode (corner case): * 2.1) If the tick can be kept stopped (no tick dependencies) * then re-evaluate the next tick and try to keep it stopped * as long as possible. * 2.2) If the tick has dependencies, restart the tick. * */ void tick_nohz_idle_exit(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); bool idle_active, tick_stopped; ktime_t now; local_irq_disable(); WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE)); WARN_ON_ONCE(ts->timer_expires_base); tick_sched_flag_clear(ts, TS_FLAG_INIDLE); idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE); tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED); if (idle_active || tick_stopped) now = ktime_get(); if (idle_active) tick_nohz_stop_idle(ts, now); if (tick_stopped) tick_nohz_idle_update_tick(ts, now); local_irq_enable(); } /* * In low-resolution mode, the tick handler must be implemented directly * at the clockevent level. hrtimer can't be used instead, because its * infrastructure actually relies on the tick itself as a backend in * low-resolution mode (see hrtimer_run_queues()). */ static void tick_nohz_lowres_handler(struct clock_event_device *dev) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); dev->next_event = KTIME_MAX; if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART)) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } static inline void tick_nohz_activate(struct tick_sched *ts) { if (!tick_nohz_enabled) return; tick_sched_flag_set(ts, TS_FLAG_NOHZ); /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) timers_update_nohz(); } /** * tick_nohz_switch_to_nohz - switch to NOHZ mode */ static void tick_nohz_switch_to_nohz(void) { if (!tick_nohz_enabled) return; if (tick_switch_to_oneshot(tick_nohz_lowres_handler)) return; /* * Recycle the hrtimer in 'ts', so we can share the * highres code. */ tick_setup_sched_timer(false); } static inline void tick_nohz_irq_enter(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE)) return; now = ktime_get(); if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) tick_nohz_stop_idle(ts, now); /* * If all CPUs are idle we may need to update a stale jiffies value. * Note nohz_full is a special case: a timekeeper is guaranteed to stay * alive but it might be busy looping with interrupts disabled in some * rare case (typically stop machine). So we must make sure we have a * last resort. */ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) tick_nohz_update_jiffies(now); } #else static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_nohz_irq_enter(void) { } static inline void tick_nohz_activate(struct tick_sched *ts) { } #endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter() to notify about the possible interruption of idle() */ void tick_irq_enter(void) { tick_check_oneshot_broadcast_this_cpu(); tick_nohz_irq_enter(); } static int sched_skew_tick; static int __init skew_tick(char *str) { get_option(&str, &sched_skew_tick); return 0; } early_param("skew_tick", skew_tick); /** * tick_setup_sched_timer - setup the tick emulation timer * @hrtimer: whether to use the hrtimer or not */ void tick_setup_sched_timer(bool hrtimer) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); /* Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) { tick_sched_flag_set(ts, TS_FLAG_HIGHRES); ts->sched_timer.function = tick_nohz_handler; } /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); /* Offset the tick to avert 'jiffies_lock' contention. */ if (sched_skew_tick) { u64 offset = TICK_NSEC >> 1; do_div(offset, num_possible_cpus()); offset *= smp_processor_id(); hrtimer_add_expires_ns(&ts->sched_timer, offset); } hrtimer_forward_now(&ts->sched_timer, TICK_NSEC); if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); else tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); tick_nohz_activate(ts); } /* * Shut down the tick and make sure the CPU won't try to retake the timekeeping * duty before disabling IRQs in idle for the last time. */ void tick_sched_timer_dying(int cpu) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); struct clock_event_device *dev = td->evtdev; ktime_t idle_sleeptime, iowait_sleeptime; unsigned long idle_calls, idle_sleeps; /* This must happen before hrtimers are migrated! */ tick_sched_timer_cancel(ts); /* * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED, * make sure not to call low-res tick handler. */ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) dev->event_handler = clockevents_handle_noop; idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; idle_calls = ts->idle_calls; idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; ts->idle_calls = idle_calls; ts->idle_sleeps = idle_sleeps; } /* * Async notification about clocksource changes */ void tick_clock_notify(void) { int cpu; for_each_possible_cpu(cpu) set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); } /* * Async notification about clock event changes */ void tick_oneshot_notify(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); set_bit(0, &ts->check_clocks); } /* * Check if a change happened, which makes oneshot possible. * * Called cyclically from the hrtimer softirq (driven by the timer * softirq). 'allow_nohz' signals that we can switch into low-res NOHZ * mode, because high resolution timers are disabled (either compile * or runtime). Called with interrupts disabled. */ int tick_check_oneshot_change(int allow_nohz) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (!test_and_clear_bit(0, &ts->check_clocks)) return 0; if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) return 0; if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) return 0; if (!allow_nohz) return 1; tick_nohz_switch_to_nohz(); return 0; }
40 3 33 19 19 40 40 40 40 40 40 22 20 40 9 8 25 25 24 3 10 15 13 25 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPV6 GSO/GRO offload support * Linux INET6 implementation */ #include <linux/kernel.h> #include <linux/socket.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/printk.h> #include <net/protocol.h> #include <net/ipv6.h> #include <net/inet_common.h> #include <net/tcp.h> #include <net/udp.h> #include <net/gro.h> #include <net/gso.h> #include "ip6_offload.h" /* All GRO functions are always builtin, except UDP over ipv6, which lays in * ipv6 module, as it depends on UDPv6 lookup function, so we need special care * when ipv6 is built as a module */ #if IS_BUILTIN(CONFIG_IPV6) #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) #else #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) #endif #define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ NAPI_GRO_CB(skb)->flush |= 1, NULL : \ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ }) static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) { const struct net_offload *ops = NULL; struct ipv6_opt_hdr *opth; for (;;) { int len; ops = rcu_dereference(inet6_offloads[proto]); if (unlikely(!ops)) break; if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) break; opth = skb_gro_header(skb, off + sizeof(*opth), off); if (unlikely(!opth)) break; len = ipv6_optlen(opth); opth = skb_gro_header(skb, off + len, off); if (unlikely(!opth)) break; proto = opth->nexthdr; off += len; } skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb)); return proto; } static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) { const struct net_offload *ops = NULL; for (;;) { struct ipv6_opt_hdr *opth; int len; ops = rcu_dereference(inet6_offloads[proto]); if (unlikely(!ops)) break; if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) break; if (unlikely(!pskb_may_pull(skb, 8))) break; opth = (void *)skb->data; len = ipv6_optlen(opth); if (unlikely(!pskb_may_pull(skb, len))) break; opth = (void *)skb->data; proto = opth->nexthdr; __skb_pull(skb, len); } return proto; } static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; const struct net_offload *ops; int proto, err; struct frag_hdr *fptr; unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; int nhoff; bool gso_partial; skb_reset_network_header(skb); err = ipv6_hopopt_jumbo_remove(skb); if (err) return ERR_PTR(err); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); ipv6h = ipv6_hdr(skb); __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) udpfrag = proto == IPPROTO_UDP && encap && (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); else udpfrag = proto == IPPROTO_UDP && !skb->encapsulation && (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); if (!segs) skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) goto out; gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); if (gso_partial && skb_is_gso(skb)) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); else payload_len = skb->len - nhoff - sizeof(*ipv6h); ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; skb_reset_mac_len(skb); if (udpfrag) { int err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) { kfree_skb_list(segs); return ERR_PTR(err); } fptr = (struct frag_hdr *)((u8 *)ipv6h + err); fptr->frag_off = htons(offset); if (skb->next) fptr->frag_off |= htons(IP6_MF); offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); } if (encap) skb_reset_inner_headers(skb); } out: return segs; } /* Return the total length of all the extension hdrs, following the same * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. */ static int ipv6_exthdrs_len(struct ipv6hdr *iph, const struct net_offload **opps) { struct ipv6_opt_hdr *opth = (void *)iph; int len = 0, proto, optlen = sizeof(*iph); proto = iph->nexthdr; for (;;) { *opps = rcu_dereference(inet6_offloads[proto]); if (unlikely(!(*opps))) break; if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) break; opth = (void *)opth + optlen; optlen = ipv6_optlen(opth); len += optlen; proto = opth->nexthdr; } return len; } INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff *pp = NULL; struct sk_buff *p; struct ipv6hdr *iph; unsigned int nlen; unsigned int hlen; unsigned int off; u16 flush = 1; int proto; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); iph = skb_gro_header(skb, hlen, off); if (unlikely(!iph)) goto out; NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off; flush += ntohs(iph->payload_len) != skb->len - hlen; proto = iph->nexthdr; ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { proto = ipv6_gro_pull_exthdrs(skb, hlen, proto); ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) goto out; iph = skb_gro_network_header(skb); } else { skb_gro_pull(skb, sizeof(*iph)); } skb_set_transport_header(skb, skb_gro_offset(skb)); NAPI_GRO_CB(skb)->proto = proto; flush--; nlen = skb_gro_offset(skb) - off; list_for_each_entry(p, head, list) { const struct ipv6hdr *iph2; __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = (struct ipv6hdr *)(p->data + off); first_word = *(__be32 *)iph ^ *(__be32 *)iph2; /* All fields must match except length and Traffic Class. * XXX skbs on the gro_list have all been parsed and pulled * already so we don't need to compare nlen * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) * memcmp() alone below is sufficient, right? */ if ((first_word & htonl(0xF00FFFFF)) || !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || iph->nexthdr != iph2->nexthdr) { not_same_flow: NAPI_GRO_CB(p)->same_flow = 0; continue; } if (unlikely(nlen > sizeof(struct ipv6hdr))) { if (memcmp(iph + 1, iph2 + 1, nlen - sizeof(struct ipv6hdr))) goto not_same_flow; } } NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, ops->callbacks.gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head, struct sk_buff *skb) { /* Common GRO receive for SIT and IP6IP6 */ if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } NAPI_GRO_CB(skb)->encap_mark = 1; return ipv6_gro_receive(head, skb); } static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, struct sk_buff *skb) { /* Common GRO receive for SIT and IP6IP6 */ if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } NAPI_GRO_CB(skb)->encap_mark = 1; return inet_gro_receive(head, skb); } INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; struct ipv6hdr *iph; int err = -ENOSYS; u32 payload_len; if (skb->encapsulation) { skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); skb_set_inner_network_header(skb, nhoff); } payload_len = skb->len - nhoff - sizeof(*iph); if (unlikely(payload_len > IPV6_MAXPLEN)) { struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); /* Move network header left */ memmove(skb_mac_header(skb) - hoplen, skb_mac_header(skb), skb->transport_header - skb->mac_header); skb->data -= hoplen; skb->len += hoplen; skb->mac_header -= hoplen; skb->network_header -= hoplen; iph = (struct ipv6hdr *)(skb->data + nhoff); hop_jumbo = (struct hop_jumbo_hdr *)(iph + 1); /* Build hop-by-hop options */ hop_jumbo->nexthdr = iph->nexthdr; hop_jumbo->hdrlen = 0; hop_jumbo->tlv_type = IPV6_TLV_JUMBO; hop_jumbo->tlv_len = 4; hop_jumbo->jumbo_payload_len = htonl(payload_len + hoplen); iph->nexthdr = NEXTHDR_HOP; iph->payload_len = 0; } else { iph = (struct ipv6hdr *)(skb->data + nhoff); iph->payload_len = htons(payload_len); } nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, udp6_gro_complete, skb, nhoff); out: return err; } static int sit_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return ipv6_gro_complete(skb, nhoff); } static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; return ipv6_gro_complete(skb, nhoff); } static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; return inet_gro_complete(skb, nhoff); } static struct sk_buff *sit_gso_segment(struct sk_buff *skb, netdev_features_t features) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) return ERR_PTR(-EINVAL); return ipv6_gso_segment(skb, features); } static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb, netdev_features_t features) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) return ERR_PTR(-EINVAL); return inet_gso_segment(skb, features); } static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb, netdev_features_t features) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) return ERR_PTR(-EINVAL); return ipv6_gso_segment(skb, features); } static const struct net_offload sit_offload = { .callbacks = { .gso_segment = sit_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, }; static const struct net_offload ip4ip6_offload = { .callbacks = { .gso_segment = ip4ip6_gso_segment, .gro_receive = ip4ip6_gro_receive, .gro_complete = ip4ip6_gro_complete, }, }; static const struct net_offload ip6ip6_offload = { .callbacks = { .gso_segment = ip6ip6_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = ip6ip6_gro_complete, }, }; static int __init ipv6_offload_init(void) { if (tcpv6_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); if (ipv6_exthdrs_offload_init() < 0) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); net_hotdata.ipv6_packet_offload = (struct packet_offload) { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { .gso_segment = ipv6_gso_segment, .gro_receive = ipv6_gro_receive, .gro_complete = ipv6_gro_complete, }, }; dev_add_offload(&net_hotdata.ipv6_packet_offload); inet_add_offload(&sit_offload, IPPROTO_IPV6); inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP); return 0; } fs_initcall(ipv6_offload_init);
15 15 48 48 48 48 40 8 74 3 75 3 75 75 75 22 61 75 75 2 73 62 22 74 61 34 1 33 33 3 30 33 34 34 33 1 61 61 61 61 28 56 60 566 515 83 16 10 7 2 10 4 10 4 3 7 22 22 11 18 7 7 3 3 2 38 18 31 38 31 16 16 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 /* * \author Rickard E. (Rik) Faith <faith@valinux.com> * \author Daryll Strauss <daryll@valinux.com> * \author Gareth Hughes <gareth@valinux.com> */ /* * Created: Mon Jan 4 08:58:31 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/anon_inodes.h> #include <linux/dma-fence.h> #include <linux/file.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/vga_switcheroo.h> #include <drm/drm_client.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" #include "drm_internal.h" /* from BKL pushdown */ DEFINE_MUTEX(drm_global_mutex); bool drm_dev_needs_global_mutex(struct drm_device *dev) { /* * The deprecated ->load callback must be called after the driver is * already registered. This means such drivers rely on the BKL to make * sure an open can't proceed until the driver is actually fully set up. * Similar hilarity holds for the unload callback. */ if (dev->driver->load || dev->driver->unload) return true; return false; } /** * DOC: file operations * * Drivers must define the file operations structure that forms the DRM * userspace API entry point, even though most of those operations are * implemented in the DRM core. The resulting &struct file_operations must be * stored in the &drm_driver.fops field. The mandatory functions are drm_open(), * drm_read(), drm_ioctl() and drm_compat_ioctl() if CONFIG_COMPAT is enabled * Note that drm_compat_ioctl will be NULL if CONFIG_COMPAT=n, so there's no * need to sprinkle #ifdef into the code. Drivers which implement private ioctls * that require 32/64 bit compatibility support must provide their own * &file_operations.compat_ioctl handler that processes private ioctls and calls * drm_compat_ioctl() for core ioctls. * * In addition drm_read() and drm_poll() provide support for DRM events. DRM * events are a generic and extensible means to send asynchronous events to * userspace through the file descriptor. They are used to send vblank event and * page flip completions by the KMS API. But drivers can also use it for their * own needs, e.g. to signal completion of rendering. * * For the driver-side event interface see drm_event_reserve_init() and * drm_send_event() as the main starting points. * * The memory mapping implementation will vary depending on how the driver * manages memory. For GEM-based drivers this is drm_gem_mmap(). * * No other file operations are supported by the DRM userspace API. Overall the * following is an example &file_operations structure:: * * static const example_drm_fops = { * .owner = THIS_MODULE, * .open = drm_open, * .release = drm_release, * .unlocked_ioctl = drm_ioctl, * .compat_ioctl = drm_compat_ioctl, // NULL if CONFIG_COMPAT=n * .poll = drm_poll, * .read = drm_read, * .mmap = drm_gem_mmap, * }; * * For plain GEM based drivers there is the DEFINE_DRM_GEM_FOPS() macro, and for * DMA based drivers there is the DEFINE_DRM_GEM_DMA_FOPS() macro to make this * simpler. * * The driver's &file_operations must be stored in &drm_driver.fops. * * For driver-private IOCTL handling see the more detailed discussion in * :ref:`IOCTL support in the userland interfaces chapter<drm_driver_ioctl>`. */ /** * drm_file_alloc - allocate file context * @minor: minor to allocate on * * This allocates a new DRM file context. It is not linked into any context and * can be used by the caller freely. Note that the context keeps a pointer to * @minor, so it must be freed before @minor is. * * RETURNS: * Pointer to newly allocated context, ERR_PTR on failure. */ struct drm_file *drm_file_alloc(struct drm_minor *minor) { static atomic64_t ident = ATOMIC_INIT(0); struct drm_device *dev = minor->dev; struct drm_file *file; int ret; file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) return ERR_PTR(-ENOMEM); /* Get a unique identifier for fdinfo: */ file->client_id = atomic64_inc_return(&ident); rcu_assign_pointer(file->pid, get_pid(task_tgid(current))); file->minor = minor; /* for compatibility root is always authenticated */ file->authenticated = capable(CAP_SYS_ADMIN); INIT_LIST_HEAD(&file->lhead); INIT_LIST_HEAD(&file->fbs); mutex_init(&file->fbs_lock); INIT_LIST_HEAD(&file->blobs); INIT_LIST_HEAD(&file->pending_event_list); INIT_LIST_HEAD(&file->event_list); init_waitqueue_head(&file->event_wait); file->event_space = 4096; /* set aside 4k for event buffer */ spin_lock_init(&file->master_lookup_lock); mutex_init(&file->event_read_lock); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_open(dev, file); if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_open(file); drm_prime_init_file_private(&file->prime); if (dev->driver->open) { ret = dev->driver->open(dev, file); if (ret < 0) goto out_prime_destroy; } return file; out_prime_destroy: drm_prime_destroy_file_private(&file->prime); if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); put_pid(rcu_access_pointer(file->pid)); kfree(file); return ERR_PTR(ret); } static void drm_events_release(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; struct drm_pending_event *e, *et; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); /* Unlink pending events */ list_for_each_entry_safe(e, et, &file_priv->pending_event_list, pending_link) { list_del(&e->pending_link); e->file_priv = NULL; } /* Remove unconsumed events */ list_for_each_entry_safe(e, et, &file_priv->event_list, link) { list_del(&e->link); kfree(e); } spin_unlock_irqrestore(&dev->event_lock, flags); } /** * drm_file_free - free file context * @file: context to free, or NULL * * This destroys and deallocates a DRM file context previously allocated via * drm_file_alloc(). The caller must make sure to unlink it from any contexts * before calling this. * * If NULL is passed, this is a no-op. */ void drm_file_free(struct drm_file *file) { struct drm_device *dev; if (!file) return; dev = file->minor->dev; drm_dbg_core(dev, "comm=\"%s\", pid=%d, dev=0x%lx, open_count=%d\n", current->comm, task_pid_nr(current), (long)old_encode_dev(file->minor->kdev->devt), atomic_read(&dev->open_count)); drm_events_release(file); if (drm_core_check_feature(dev, DRIVER_MODESET)) { drm_fb_release(file); drm_property_destroy_user_blobs(dev, file); } if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); if (drm_is_primary_client(file)) drm_master_release(file); if (dev->driver->postclose) dev->driver->postclose(dev, file); drm_prime_destroy_file_private(&file->prime); WARN_ON(!list_empty(&file->event_list)); put_pid(rcu_access_pointer(file->pid)); kfree(file); } static void drm_close_helper(struct file *filp) { struct drm_file *file_priv = filp->private_data; struct drm_device *dev = file_priv->minor->dev; mutex_lock(&dev->filelist_mutex); list_del(&file_priv->lhead); mutex_unlock(&dev->filelist_mutex); drm_file_free(file_priv); } /* * Check whether DRI will run on this CPU. * * \return non-zero if the DRI will run on this CPU, or zero otherwise. */ static int drm_cpu_valid(void) { #if defined(__sparc__) && !defined(__sparc_v9__) return 0; /* No cmpxchg before v9 sparc. */ #endif return 1; } /* * Called whenever a process opens a drm node * * \param filp file pointer. * \param minor acquired minor-object. * \return zero on success or a negative number on failure. * * Creates and initializes a drm_file structure for the file private data in \p * filp and add it into the double linked list in \p dev. */ int drm_open_helper(struct file *filp, struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct drm_file *priv; int ret; if (filp->f_flags & O_EXCL) return -EBUSY; /* No exclusive opens */ if (!drm_cpu_valid()) return -EINVAL; if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF) return -EINVAL; if (WARN_ON_ONCE(!(filp->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) return -EINVAL; drm_dbg_core(dev, "comm=\"%s\", pid=%d, minor=%d\n", current->comm, task_pid_nr(current), minor->index); priv = drm_file_alloc(minor); if (IS_ERR(priv)) return PTR_ERR(priv); if (drm_is_primary_client(priv)) { ret = drm_master_open(priv); if (ret) { drm_file_free(priv); return ret; } } filp->private_data = priv; priv->filp = filp; mutex_lock(&dev->filelist_mutex); list_add(&priv->lhead, &dev->filelist); mutex_unlock(&dev->filelist_mutex); return 0; } /** * drm_open - open method for DRM file * @inode: device inode * @filp: file pointer. * * This function must be used by drivers as their &file_operations.open method. * It looks up the correct DRM device and instantiates all the per-file * resources for it. It also calls the &drm_driver.open driver callback. * * RETURNS: * 0 on success or negative errno value on failure. */ int drm_open(struct inode *inode, struct file *filp) { struct drm_device *dev; struct drm_minor *minor; int retcode; minor = drm_minor_acquire(&drm_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); dev = minor->dev; if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); atomic_fetch_inc(&dev->open_count); /* share address_space across all char-devs of a single device */ filp->f_mapping = dev->anon_inode->i_mapping; retcode = drm_open_helper(filp, minor); if (retcode) goto err_undo; if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); return 0; err_undo: atomic_dec(&dev->open_count); if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); drm_minor_release(minor); return retcode; } EXPORT_SYMBOL(drm_open); static void drm_lastclose(struct drm_device *dev) { drm_client_dev_restore(dev); if (dev_is_pci(dev->dev)) vga_switcheroo_process_delayed_switch(); } /** * drm_release - release method for DRM file * @inode: device inode * @filp: file pointer. * * This function must be used by drivers as their &file_operations.release * method. It frees any resources associated with the open file. If this * is the last open file for the DRM device, it also restores the active * in-kernel DRM client. * * RETURNS: * Always succeeds and returns 0. */ int drm_release(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; struct drm_minor *minor = file_priv->minor; struct drm_device *dev = minor->dev; if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); drm_dbg_core(dev, "open_count = %d\n", atomic_read(&dev->open_count)); drm_close_helper(filp); if (atomic_dec_and_test(&dev->open_count)) drm_lastclose(dev); if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); drm_minor_release(minor); return 0; } EXPORT_SYMBOL(drm_release); void drm_file_update_pid(struct drm_file *filp) { struct drm_device *dev; struct pid *pid, *old; /* * Master nodes need to keep the original ownership in order for * drm_master_check_perm to keep working correctly. (See comment in * drm_auth.c.) */ if (filp->was_master) return; pid = task_tgid(current); /* * Quick unlocked check since the model is a single handover followed by * exclusive repeated use. */ if (pid == rcu_access_pointer(filp->pid)) return; dev = filp->minor->dev; mutex_lock(&dev->filelist_mutex); get_pid(pid); old = rcu_replace_pointer(filp->pid, pid, 1); mutex_unlock(&dev->filelist_mutex); synchronize_rcu(); put_pid(old); } /** * drm_release_noglobal - release method for DRM file * @inode: device inode * @filp: file pointer. * * This function may be used by drivers as their &file_operations.release * method. It frees any resources associated with the open file prior to taking * the drm_global_mutex. If this is the last open file for the DRM device, it * then restores the active in-kernel DRM client. * * RETURNS: * Always succeeds and returns 0. */ int drm_release_noglobal(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; struct drm_minor *minor = file_priv->minor; struct drm_device *dev = minor->dev; drm_close_helper(filp); if (atomic_dec_and_mutex_lock(&dev->open_count, &drm_global_mutex)) { drm_lastclose(dev); mutex_unlock(&drm_global_mutex); } drm_minor_release(minor); return 0; } EXPORT_SYMBOL(drm_release_noglobal); /** * drm_read - read method for DRM file * @filp: file pointer * @buffer: userspace destination pointer for the read * @count: count in bytes to read * @offset: offset to read * * This function must be used by drivers as their &file_operations.read * method if they use DRM events for asynchronous signalling to userspace. * Since events are used by the KMS API for vblank and page flip completion this * means all modern display drivers must use it. * * @offset is ignored, DRM events are read like a pipe. Polling support is * provided by drm_poll(). * * This function will only ever read a full event. Therefore userspace must * supply a big enough buffer to fit any event to ensure forward progress. Since * the maximum event space is currently 4K it's recommended to just use that for * safety. * * RETURNS: * Number of bytes read (always aligned to full events, and can be 0) or a * negative error code on failure. */ ssize_t drm_read(struct file *filp, char __user *buffer, size_t count, loff_t *offset) { struct drm_file *file_priv = filp->private_data; struct drm_device *dev = file_priv->minor->dev; ssize_t ret; ret = mutex_lock_interruptible(&file_priv->event_read_lock); if (ret) return ret; for (;;) { struct drm_pending_event *e = NULL; spin_lock_irq(&dev->event_lock); if (!list_empty(&file_priv->event_list)) { e = list_first_entry(&file_priv->event_list, struct drm_pending_event, link); file_priv->event_space += e->event->length; list_del(&e->link); } spin_unlock_irq(&dev->event_lock); if (e == NULL) { if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } mutex_unlock(&file_priv->event_read_lock); ret = wait_event_interruptible(file_priv->event_wait, !list_empty(&file_priv->event_list)); if (ret >= 0) ret = mutex_lock_interruptible(&file_priv->event_read_lock); if (ret) return ret; } else { unsigned length = e->event->length; if (length > count - ret) { put_back_event: spin_lock_irq(&dev->event_lock); file_priv->event_space -= length; list_add(&e->link, &file_priv->event_list); spin_unlock_irq(&dev->event_lock); wake_up_interruptible_poll(&file_priv->event_wait, EPOLLIN | EPOLLRDNORM); break; } if (copy_to_user(buffer + ret, e->event, length)) { if (ret == 0) ret = -EFAULT; goto put_back_event; } ret += length; kfree(e); } } mutex_unlock(&file_priv->event_read_lock); return ret; } EXPORT_SYMBOL(drm_read); /** * drm_poll - poll method for DRM file * @filp: file pointer * @wait: poll waiter table * * This function must be used by drivers as their &file_operations.read method * if they use DRM events for asynchronous signalling to userspace. Since * events are used by the KMS API for vblank and page flip completion this means * all modern display drivers must use it. * * See also drm_read(). * * RETURNS: * Mask of POLL flags indicating the current status of the file. */ __poll_t drm_poll(struct file *filp, struct poll_table_struct *wait) { struct drm_file *file_priv = filp->private_data; __poll_t mask = 0; poll_wait(filp, &file_priv->event_wait, wait); if (!list_empty(&file_priv->event_list)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(drm_poll); /** * drm_event_reserve_init_locked - init a DRM event and reserve space for it * @dev: DRM device * @file_priv: DRM file private data * @p: tracking structure for the pending event * @e: actual event data to deliver to userspace * * This function prepares the passed in event for eventual delivery. If the event * doesn't get delivered (because the IOCTL fails later on, before queuing up * anything) then the even must be cancelled and freed using * drm_event_cancel_free(). Successfully initialized events should be sent out * using drm_send_event() or drm_send_event_locked() to signal completion of the * asynchronous event to userspace. * * If callers embedded @p into a larger structure it must be allocated with * kmalloc and @p must be the first member element. * * This is the locked version of drm_event_reserve_init() for callers which * already hold &drm_device.event_lock. * * RETURNS: * 0 on success or a negative error code on failure. */ int drm_event_reserve_init_locked(struct drm_device *dev, struct drm_file *file_priv, struct drm_pending_event *p, struct drm_event *e) { if (file_priv->event_space < e->length) return -ENOMEM; file_priv->event_space -= e->length; p->event = e; list_add(&p->pending_link, &file_priv->pending_event_list); p->file_priv = file_priv; return 0; } EXPORT_SYMBOL(drm_event_reserve_init_locked); /** * drm_event_reserve_init - init a DRM event and reserve space for it * @dev: DRM device * @file_priv: DRM file private data * @p: tracking structure for the pending event * @e: actual event data to deliver to userspace * * This function prepares the passed in event for eventual delivery. If the event * doesn't get delivered (because the IOCTL fails later on, before queuing up * anything) then the even must be cancelled and freed using * drm_event_cancel_free(). Successfully initialized events should be sent out * using drm_send_event() or drm_send_event_locked() to signal completion of the * asynchronous event to userspace. * * If callers embedded @p into a larger structure it must be allocated with * kmalloc and @p must be the first member element. * * Callers which already hold &drm_device.event_lock should use * drm_event_reserve_init_locked() instead. * * RETURNS: * 0 on success or a negative error code on failure. */ int drm_event_reserve_init(struct drm_device *dev, struct drm_file *file_priv, struct drm_pending_event *p, struct drm_event *e) { unsigned long flags; int ret; spin_lock_irqsave(&dev->event_lock, flags); ret = drm_event_reserve_init_locked(dev, file_priv, p, e); spin_unlock_irqrestore(&dev->event_lock, flags); return ret; } EXPORT_SYMBOL(drm_event_reserve_init); /** * drm_event_cancel_free - free a DRM event and release its space * @dev: DRM device * @p: tracking structure for the pending event * * This function frees the event @p initialized with drm_event_reserve_init() * and releases any allocated space. It is used to cancel an event when the * nonblocking operation could not be submitted and needed to be aborted. */ void drm_event_cancel_free(struct drm_device *dev, struct drm_pending_event *p) { unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); if (p->file_priv) { p->file_priv->event_space += p->event->length; list_del(&p->pending_link); } spin_unlock_irqrestore(&dev->event_lock, flags); if (p->fence) dma_fence_put(p->fence); kfree(p); } EXPORT_SYMBOL(drm_event_cancel_free); static void drm_send_event_helper(struct drm_device *dev, struct drm_pending_event *e, ktime_t timestamp) { assert_spin_locked(&dev->event_lock); if (e->completion) { complete_all(e->completion); e->completion_release(e->completion); e->completion = NULL; } if (e->fence) { if (timestamp) dma_fence_signal_timestamp(e->fence, timestamp); else dma_fence_signal(e->fence); dma_fence_put(e->fence); } if (!e->file_priv) { kfree(e); return; } list_del(&e->pending_link); list_add_tail(&e->link, &e->file_priv->event_list); wake_up_interruptible_poll(&e->file_priv->event_wait, EPOLLIN | EPOLLRDNORM); } /** * drm_send_event_timestamp_locked - send DRM event to file descriptor * @dev: DRM device * @e: DRM event to deliver * @timestamp: timestamp to set for the fence event in kernel's CLOCK_MONOTONIC * time domain * * This function sends the event @e, initialized with drm_event_reserve_init(), * to its associated userspace DRM file. Callers must already hold * &drm_device.event_lock. * * Note that the core will take care of unlinking and disarming events when the * corresponding DRM file is closed. Drivers need not worry about whether the * DRM file for this event still exists and can call this function upon * completion of the asynchronous work unconditionally. */ void drm_send_event_timestamp_locked(struct drm_device *dev, struct drm_pending_event *e, ktime_t timestamp) { drm_send_event_helper(dev, e, timestamp); } EXPORT_SYMBOL(drm_send_event_timestamp_locked); /** * drm_send_event_locked - send DRM event to file descriptor * @dev: DRM device * @e: DRM event to deliver * * This function sends the event @e, initialized with drm_event_reserve_init(), * to its associated userspace DRM file. Callers must already hold * &drm_device.event_lock, see drm_send_event() for the unlocked version. * * Note that the core will take care of unlinking and disarming events when the * corresponding DRM file is closed. Drivers need not worry about whether the * DRM file for this event still exists and can call this function upon * completion of the asynchronous work unconditionally. */ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e) { drm_send_event_helper(dev, e, 0); } EXPORT_SYMBOL(drm_send_event_locked); /** * drm_send_event - send DRM event to file descriptor * @dev: DRM device * @e: DRM event to deliver * * This function sends the event @e, initialized with drm_event_reserve_init(), * to its associated userspace DRM file. This function acquires * &drm_device.event_lock, see drm_send_event_locked() for callers which already * hold this lock. * * Note that the core will take care of unlinking and disarming events when the * corresponding DRM file is closed. Drivers need not worry about whether the * DRM file for this event still exists and can call this function upon * completion of the asynchronous work unconditionally. */ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e) { unsigned long irqflags; spin_lock_irqsave(&dev->event_lock, irqflags); drm_send_event_helper(dev, e, 0); spin_unlock_irqrestore(&dev->event_lock, irqflags); } EXPORT_SYMBOL(drm_send_event); static void print_size(struct drm_printer *p, const char *stat, const char *region, u64 sz) { const char *units[] = {"", " KiB", " MiB"}; unsigned u; for (u = 0; u < ARRAY_SIZE(units) - 1; u++) { if (sz == 0 || !IS_ALIGNED(sz, SZ_1K)) break; sz = div_u64(sz, SZ_1K); } drm_printf(p, "drm-%s-%s:\t%llu%s\n", stat, region, sz, units[u]); } /** * drm_print_memory_stats - A helper to print memory stats * @p: The printer to print output to * @stats: The collected memory stats * @supported_status: Bitmask of optional stats which are available * @region: The memory region * */ void drm_print_memory_stats(struct drm_printer *p, const struct drm_memory_stats *stats, enum drm_gem_object_status supported_status, const char *region) { print_size(p, "total", region, stats->private + stats->shared); print_size(p, "shared", region, stats->shared); print_size(p, "active", region, stats->active); if (supported_status & DRM_GEM_OBJECT_RESIDENT) print_size(p, "resident", region, stats->resident); if (supported_status & DRM_GEM_OBJECT_PURGEABLE) print_size(p, "purgeable", region, stats->purgeable); } EXPORT_SYMBOL(drm_print_memory_stats); /** * drm_show_memory_stats - Helper to collect and show standard fdinfo memory stats * @p: the printer to print output to * @file: the DRM file * * Helper to iterate over GEM objects with a handle allocated in the specified * file. */ void drm_show_memory_stats(struct drm_printer *p, struct drm_file *file) { struct drm_gem_object *obj; struct drm_memory_stats status = {}; enum drm_gem_object_status supported_status = 0; int id; spin_lock(&file->table_lock); idr_for_each_entry (&file->object_idr, obj, id) { enum drm_gem_object_status s = 0; size_t add_size = (obj->funcs && obj->funcs->rss) ? obj->funcs->rss(obj) : obj->size; if (obj->funcs && obj->funcs->status) { s = obj->funcs->status(obj); supported_status = DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_PURGEABLE; } if (drm_gem_object_is_shared_for_memory_stats(obj)) { status.shared += obj->size; } else { status.private += obj->size; } if (s & DRM_GEM_OBJECT_RESIDENT) { status.resident += add_size; } else { /* If already purged or not yet backed by pages, don't * count it as purgeable: */ s &= ~DRM_GEM_OBJECT_PURGEABLE; } if (!dma_resv_test_signaled(obj->resv, dma_resv_usage_rw(true))) { status.active += add_size; /* If still active, don't count as purgeable: */ s &= ~DRM_GEM_OBJECT_PURGEABLE; } if (s & DRM_GEM_OBJECT_PURGEABLE) status.purgeable += add_size; } spin_unlock(&file->table_lock); drm_print_memory_stats(p, &status, supported_status, "memory"); } EXPORT_SYMBOL(drm_show_memory_stats); /** * drm_show_fdinfo - helper for drm file fops * @m: output stream * @f: the device file instance * * Helper to implement fdinfo, for userspace to query usage stats, etc, of a * process using the GPU. See also &drm_driver.show_fdinfo. * * For text output format description please see Documentation/gpu/drm-usage-stats.rst */ void drm_show_fdinfo(struct seq_file *m, struct file *f) { struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); if (dev_is_pci(dev->dev)) { struct pci_dev *pdev = to_pci_dev(dev->dev); drm_printf(&p, "drm-pdev:\t%04x:%02x:%02x.%d\n", pci_domain_nr(pdev->bus), pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); } EXPORT_SYMBOL(drm_show_fdinfo); /** * mock_drm_getfile - Create a new struct file for the drm device * @minor: drm minor to wrap (e.g. #drm_device.primary) * @flags: file creation mode (O_RDWR etc) * * This create a new struct file that wraps a DRM file context around a * DRM minor. This mimicks userspace opening e.g. /dev/dri/card0, but without * invoking userspace. The struct file may be operated on using its f_op * (the drm_device.driver.fops) to mimick userspace operations, or be supplied * to userspace facing functions as an internal/anonymous client. * * RETURNS: * Pointer to newly created struct file, ERR_PTR on failure. */ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags) { struct drm_device *dev = minor->dev; struct drm_file *priv; struct file *file; priv = drm_file_alloc(minor); if (IS_ERR(priv)) return ERR_CAST(priv); file = anon_inode_getfile("drm", dev->driver->fops, priv, flags); if (IS_ERR(file)) { drm_file_free(priv); return file; } /* Everyone shares a single global address space */ file->f_mapping = dev->anon_inode->i_mapping; drm_dev_get(dev); priv->filp = file; return file; } EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
2 2 2 2 2 2 2 2 2 2 2 2 3 5 3 3 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 // SPDX-License-Identifier: GPL-2.0 /* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces. * * File es58x_core.c: Core logic to manage the network devices and the * USB interface. * * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved. * Copyright (c) 2020 ETAS K.K.. All rights reserved. * Copyright (c) 2020-2022 Vincent Mailhol <mailhol.vincent@wanadoo.fr> */ #include <linux/unaligned.h> #include <linux/crc16.h> #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/usb.h> #include <net/devlink.h> #include "es58x_core.h" MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>"); MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>"); MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters"); MODULE_LICENSE("GPL v2"); #define ES58X_VENDOR_ID 0x108C #define ES581_4_PRODUCT_ID 0x0159 #define ES582_1_PRODUCT_ID 0x0168 #define ES584_1_PRODUCT_ID 0x0169 /* ES58X FD has some interface protocols unsupported by this driver. */ #define ES58X_FD_INTERFACE_PROTOCOL 0 /* Table of devices which work with this driver. */ static const struct usb_device_id es58x_id_table[] = { { /* ETAS GmbH ES581.4 USB dual-channel CAN Bus Interface module. */ USB_DEVICE(ES58X_VENDOR_ID, ES581_4_PRODUCT_ID), .driver_info = ES58X_DUAL_CHANNEL }, { /* ETAS GmbH ES582.1 USB dual-channel CAN FD Bus Interface module. */ USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES582_1_PRODUCT_ID, ES58X_FD_INTERFACE_PROTOCOL), .driver_info = ES58X_DUAL_CHANNEL | ES58X_FD_FAMILY }, { /* ETAS GmbH ES584.1 USB single-channel CAN FD Bus Interface module. */ USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES584_1_PRODUCT_ID, ES58X_FD_INTERFACE_PROTOCOL), .driver_info = ES58X_FD_FAMILY }, { /* Terminating entry */ } }; MODULE_DEVICE_TABLE(usb, es58x_id_table); #define es58x_print_hex_dump(buf, len) \ print_hex_dump(KERN_DEBUG, \ KBUILD_MODNAME " " __stringify(buf) ": ", \ DUMP_PREFIX_NONE, 16, 1, buf, len, false) #define es58x_print_hex_dump_debug(buf, len) \ print_hex_dump_debug(KBUILD_MODNAME " " __stringify(buf) ": ",\ DUMP_PREFIX_NONE, 16, 1, buf, len, false) /* The last two bytes of an ES58X command is a CRC16. The first two * bytes (the start of frame) are skipped and the CRC calculation * starts on the third byte. */ #define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof) /** * es58x_calculate_crc() - Compute the crc16 of a given URB. * @urb_cmd: The URB command for which we want to calculate the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: crc16 value. */ static u16 es58x_calculate_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; ssize_t len = urb_len - ES58X_CRC_CALC_OFFSET - sizeof(crc); crc = crc16(0, &urb_cmd->raw_cmd[ES58X_CRC_CALC_OFFSET], len); return crc; } /** * es58x_get_crc() - Get the CRC value of a given URB. * @urb_cmd: The URB command for which we want to get the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: crc16 value. */ static u16 es58x_get_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; const __le16 *crc_addr; crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)]; crc = get_unaligned_le16(crc_addr); return crc; } /** * es58x_set_crc() - Set the CRC value of a given URB. * @urb_cmd: The URB command for which we want to get the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) */ static void es58x_set_crc(union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 crc; __le16 *crc_addr; crc = es58x_calculate_crc(urb_cmd, urb_len); crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)]; put_unaligned_le16(crc, crc_addr); } /** * es58x_check_crc() - Validate the CRC value of a given URB. * @es58x_dev: ES58X device. * @urb_cmd: The URB command for which we want to check the CRC. * @urb_len: Length of @urb_cmd. Must be at least bigger than 4 * (ES58X_CRC_CALC_OFFSET + sizeof(crc)) * * Return: zero on success, -EBADMSG if the CRC check fails. */ static int es58x_check_crc(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd, u16 urb_len) { u16 calculated_crc = es58x_calculate_crc(urb_cmd, urb_len); u16 expected_crc = es58x_get_crc(urb_cmd, urb_len); if (expected_crc != calculated_crc) { dev_err_ratelimited(es58x_dev->dev, "%s: Bad CRC, urb_len: %d\n", __func__, urb_len); return -EBADMSG; } return 0; } /** * es58x_timestamp_to_ns() - Convert a timestamp value received from a * ES58X device to nanoseconds. * @timestamp: Timestamp received from a ES58X device. * * The timestamp received from ES58X is expressed in multiples of 0.5 * micro seconds. This function converts it in to nanoseconds. * * Return: Timestamp value in nanoseconds. */ static u64 es58x_timestamp_to_ns(u64 timestamp) { const u64 es58x_timestamp_ns_mult_coef = 500ULL; return es58x_timestamp_ns_mult_coef * timestamp; } /** * es58x_set_skb_timestamp() - Set the hardware timestamp of an skb. * @netdev: CAN network device. * @skb: socket buffer of a CAN message. * @timestamp: Timestamp received from an ES58X device. * * Used for both received and echo messages. */ static void es58x_set_skb_timestamp(struct net_device *netdev, struct sk_buff *skb, u64 timestamp) { struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; struct skb_shared_hwtstamps *hwts; hwts = skb_hwtstamps(skb); /* Ignoring overflow (overflow on 64 bits timestamp with nano * second precision would occur after more than 500 years). */ hwts->hwtstamp = ns_to_ktime(es58x_timestamp_to_ns(timestamp) + es58x_dev->realtime_diff_ns); } /** * es58x_rx_timestamp() - Handle a received timestamp. * @es58x_dev: ES58X device. * @timestamp: Timestamp received from a ES58X device. * * Calculate the difference between the ES58X device and the kernel * internal clocks. This difference will be later used as an offset to * convert the timestamps of RX and echo messages to match the kernel * system time (e.g. convert to UNIX time). */ void es58x_rx_timestamp(struct es58x_device *es58x_dev, u64 timestamp) { u64 ktime_real_ns = ktime_get_real_ns(); u64 device_timestamp = es58x_timestamp_to_ns(timestamp); dev_dbg(es58x_dev->dev, "%s: request round-trip time: %llu ns\n", __func__, ktime_real_ns - es58x_dev->ktime_req_ns); es58x_dev->realtime_diff_ns = (es58x_dev->ktime_req_ns + ktime_real_ns) / 2 - device_timestamp; es58x_dev->ktime_req_ns = 0; dev_dbg(es58x_dev->dev, "%s: Device timestamp: %llu, diff with kernel: %llu\n", __func__, device_timestamp, es58x_dev->realtime_diff_ns); } /** * es58x_set_realtime_diff_ns() - Calculate difference between the * clocks of the ES58X device and the kernel * @es58x_dev: ES58X device. * * Request a timestamp from the ES58X device. Once the answer is * received, the timestamp difference will be set by the callback * function es58x_rx_timestamp(). * * Return: zero on success, errno when any error occurs. */ static int es58x_set_realtime_diff_ns(struct es58x_device *es58x_dev) { if (es58x_dev->ktime_req_ns) { dev_warn(es58x_dev->dev, "%s: Previous request to set timestamp has not completed yet\n", __func__); return -EBUSY; } es58x_dev->ktime_req_ns = ktime_get_real_ns(); return es58x_dev->ops->get_timestamp(es58x_dev); } /** * es58x_is_can_state_active() - Is the network device in an active * CAN state? * @netdev: CAN network device. * * The device is considered active if it is able to send or receive * CAN frames, that is to say if it is in any of * CAN_STATE_ERROR_ACTIVE, CAN_STATE_ERROR_WARNING or * CAN_STATE_ERROR_PASSIVE states. * * Caution: when recovering from a bus-off, * net/core/dev.c#can_restart() will call * net/core/dev.c#can_flush_echo_skb() without using any kind of * locks. For this reason, it is critical to guarantee that no TX or * echo operations (i.e. any access to priv->echo_skb[]) can be done * while this function is returning false. * * Return: true if the device is active, else returns false. */ static bool es58x_is_can_state_active(struct net_device *netdev) { return es58x_priv(netdev)->can.state < CAN_STATE_BUS_OFF; } /** * es58x_is_echo_skb_threshold_reached() - Determine the limit of how * many skb slots can be taken before we should stop the network * queue. * @priv: ES58X private parameters related to the network device. * * We need to save enough free skb slots in order to be able to do * bulk send. This function can be used to determine when to wake or * stop the network queue in regard to the number of skb slots already * taken if the echo FIFO. * * Return: boolean. */ static bool es58x_is_echo_skb_threshold_reached(struct es58x_priv *priv) { u32 num_echo_skb = priv->tx_head - priv->tx_tail; u32 threshold = priv->can.echo_skb_max - priv->es58x_dev->param->tx_bulk_max + 1; return num_echo_skb >= threshold; } /** * es58x_can_free_echo_skb_tail() - Remove the oldest echo skb of the * echo FIFO. * @netdev: CAN network device. * * Naming convention: the tail is the beginning of the FIFO, i.e. the * first skb to have entered the FIFO. */ static void es58x_can_free_echo_skb_tail(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); u16 fifo_mask = priv->es58x_dev->param->fifo_mask; unsigned int frame_len = 0; can_free_echo_skb(netdev, priv->tx_tail & fifo_mask, &frame_len); netdev_completed_queue(netdev, 1, frame_len); priv->tx_tail++; netdev->stats.tx_dropped++; } /** * es58x_can_get_echo_skb_recovery() - Try to re-sync the echo FIFO. * @netdev: CAN network device. * @rcv_packet_idx: Index * * This function should not be called under normal circumstances. In * the unlikely case that one or several URB packages get dropped by * the device, the index will get out of sync. Try to recover by * dropping the echo skb packets with older indexes. * * Return: zero if recovery was successful, -EINVAL otherwise. */ static int es58x_can_get_echo_skb_recovery(struct net_device *netdev, u32 rcv_packet_idx) { struct es58x_priv *priv = es58x_priv(netdev); int ret = 0; netdev->stats.tx_errors++; if (net_ratelimit()) netdev_warn(netdev, "Bad echo packet index: %u. First index: %u, end index %u, num_echo_skb: %02u/%02u\n", rcv_packet_idx, priv->tx_tail, priv->tx_head, priv->tx_head - priv->tx_tail, priv->can.echo_skb_max); if ((s32)(rcv_packet_idx - priv->tx_tail) < 0) { if (net_ratelimit()) netdev_warn(netdev, "Received echo index is from the past. Ignoring it\n"); ret = -EINVAL; } else if ((s32)(rcv_packet_idx - priv->tx_head) >= 0) { if (net_ratelimit()) netdev_err(netdev, "Received echo index is from the future. Ignoring it\n"); ret = -EINVAL; } else { if (net_ratelimit()) netdev_warn(netdev, "Recovery: dropping %u echo skb from index %u to %u\n", rcv_packet_idx - priv->tx_tail, priv->tx_tail, rcv_packet_idx - 1); while (priv->tx_tail != rcv_packet_idx) { if (priv->tx_tail == priv->tx_head) return -EINVAL; es58x_can_free_echo_skb_tail(netdev); } } return ret; } /** * es58x_can_get_echo_skb() - Get the skb from the echo FIFO and loop * it back locally. * @netdev: CAN network device. * @rcv_packet_idx: Index of the first packet received from the device. * @tstamps: Array of hardware timestamps received from a ES58X device. * @pkts: Number of packets (and so, length of @tstamps). * * Callback function for when we receive a self reception * acknowledgment. Retrieves the skb from the echo FIFO, sets its * hardware timestamp (the actual time it was sent) and loops it back * locally. * * The device has to be active (i.e. network interface UP and not in * bus off state or restarting). * * Packet indexes must be consecutive (i.e. index of first packet is * @rcv_packet_idx, index of second packet is @rcv_packet_idx + 1 and * index of last packet is @rcv_packet_idx + @pkts - 1). * * Return: zero on success. */ int es58x_can_get_echo_skb(struct net_device *netdev, u32 rcv_packet_idx, u64 *tstamps, unsigned int pkts) { struct es58x_priv *priv = es58x_priv(netdev); unsigned int rx_total_frame_len = 0; unsigned int num_echo_skb = priv->tx_head - priv->tx_tail; int i; u16 fifo_mask = priv->es58x_dev->param->fifo_mask; if (!netif_running(netdev)) { if (net_ratelimit()) netdev_info(netdev, "%s: %s is down, dropping %d echo packets\n", __func__, netdev->name, pkts); netdev->stats.tx_dropped += pkts; return 0; } else if (!es58x_is_can_state_active(netdev)) { if (net_ratelimit()) netdev_dbg(netdev, "Bus is off or device is restarting. Ignoring %u echo packets from index %u\n", pkts, rcv_packet_idx); /* stats.tx_dropped will be (or was already) * incremented by * drivers/net/can/net/dev.c:can_flush_echo_skb(). */ return 0; } else if (num_echo_skb == 0) { if (net_ratelimit()) netdev_warn(netdev, "Received %u echo packets from index: %u but echo skb queue is empty.\n", pkts, rcv_packet_idx); netdev->stats.tx_dropped += pkts; return 0; } if (priv->tx_tail != rcv_packet_idx) { if (es58x_can_get_echo_skb_recovery(netdev, rcv_packet_idx) < 0) { if (net_ratelimit()) netdev_warn(netdev, "Could not find echo skb for echo packet index: %u\n", rcv_packet_idx); return 0; } } if (num_echo_skb < pkts) { int pkts_drop = pkts - num_echo_skb; if (net_ratelimit()) netdev_err(netdev, "Received %u echo packets but have only %d echo skb. Dropping %d echo skb\n", pkts, num_echo_skb, pkts_drop); netdev->stats.tx_dropped += pkts_drop; pkts -= pkts_drop; } for (i = 0; i < pkts; i++) { unsigned int skb_idx = priv->tx_tail & fifo_mask; struct sk_buff *skb = priv->can.echo_skb[skb_idx]; unsigned int frame_len = 0; if (skb) es58x_set_skb_timestamp(netdev, skb, tstamps[i]); netdev->stats.tx_bytes += can_get_echo_skb(netdev, skb_idx, &frame_len); rx_total_frame_len += frame_len; priv->tx_tail++; } netdev_completed_queue(netdev, pkts, rx_total_frame_len); netdev->stats.tx_packets += pkts; priv->err_passive_before_rtx_success = 0; if (!es58x_is_echo_skb_threshold_reached(priv)) netif_wake_queue(netdev); return 0; } /** * es58x_can_reset_echo_fifo() - Reset the echo FIFO. * @netdev: CAN network device. * * The echo_skb array of struct can_priv will be flushed by * drivers/net/can/dev.c:can_flush_echo_skb(). This function resets * the parameters of the struct es58x_priv of our device and reset the * queue (c.f. BQL). */ static void es58x_can_reset_echo_fifo(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); priv->tx_tail = 0; priv->tx_head = 0; priv->tx_urb = NULL; priv->err_passive_before_rtx_success = 0; netdev_reset_queue(netdev); } /** * es58x_flush_pending_tx_msg() - Reset the buffer for transmission messages. * @netdev: CAN network device. * * es58x_start_xmit() will queue up to tx_bulk_max messages in * &tx_urb buffer and do a bulk send of all messages in one single URB * (c.f. xmit_more flag). When the device recovers from a bus off * state or when the device stops, the tx_urb buffer might still have * pending messages in it and thus need to be flushed. */ static void es58x_flush_pending_tx_msg(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; if (priv->tx_urb) { netdev_warn(netdev, "%s: dropping %d TX messages\n", __func__, priv->tx_can_msg_cnt); netdev->stats.tx_dropped += priv->tx_can_msg_cnt; while (priv->tx_can_msg_cnt > 0) { unsigned int frame_len = 0; u16 fifo_mask = priv->es58x_dev->param->fifo_mask; priv->tx_head--; priv->tx_can_msg_cnt--; can_free_echo_skb(netdev, priv->tx_head & fifo_mask, &frame_len); netdev_completed_queue(netdev, 1, frame_len); } usb_anchor_urb(priv->tx_urb, &priv->es58x_dev->tx_urbs_idle); atomic_inc(&es58x_dev->tx_urbs_idle_cnt); usb_free_urb(priv->tx_urb); } priv->tx_urb = NULL; } /** * es58x_tx_ack_msg() - Handle acknowledgment messages. * @netdev: CAN network device. * @tx_free_entries: Number of free entries in the device transmit FIFO. * @rx_cmd_ret_u32: error code as returned by the ES58X device. * * ES58X sends an acknowledgment message after a transmission request * is done. This is mandatory for the ES581.4 but is optional (and * deactivated in this driver) for the ES58X_FD family. * * Under normal circumstances, this function should never throw an * error message. * * Return: zero on success, errno when any error occurs. */ int es58x_tx_ack_msg(struct net_device *netdev, u16 tx_free_entries, enum es58x_ret_u32 rx_cmd_ret_u32) { struct es58x_priv *priv = es58x_priv(netdev); if (tx_free_entries <= priv->es58x_dev->param->tx_bulk_max) { if (net_ratelimit()) netdev_err(netdev, "Only %d entries left in device queue, num_echo_skb: %d/%d\n", tx_free_entries, priv->tx_head - priv->tx_tail, priv->can.echo_skb_max); netif_stop_queue(netdev); } return es58x_rx_cmd_ret_u32(netdev, ES58X_RET_TYPE_TX_MSG, rx_cmd_ret_u32); } /** * es58x_rx_can_msg() - Handle a received a CAN message. * @netdev: CAN network device. * @timestamp: Hardware time stamp (only relevant in rx branches). * @data: CAN payload. * @can_id: CAN ID. * @es58x_flags: Please refer to enum es58x_flag. * @dlc: Data Length Code (raw value). * * Fill up a CAN skb and post it. * * This function handles the case where the DLC of a classical CAN * frame is greater than CAN_MAX_DLEN (c.f. the len8_dlc field of * struct can_frame). * * Return: zero on success. */ int es58x_rx_can_msg(struct net_device *netdev, u64 timestamp, const u8 *data, canid_t can_id, enum es58x_flag es58x_flags, u8 dlc) { struct canfd_frame *cfd; struct can_frame *ccf; struct sk_buff *skb; u8 len; bool is_can_fd = !!(es58x_flags & ES58X_FLAG_FD_DATA); if (dlc > CAN_MAX_RAW_DLC) { netdev_err(netdev, "%s: DLC is %d but maximum should be %d\n", __func__, dlc, CAN_MAX_RAW_DLC); return -EMSGSIZE; } if (is_can_fd) { len = can_fd_dlc2len(dlc); skb = alloc_canfd_skb(netdev, &cfd); } else { len = can_cc_dlc2len(dlc); skb = alloc_can_skb(netdev, &ccf); cfd = (struct canfd_frame *)ccf; } if (!skb) { netdev->stats.rx_dropped++; return 0; } cfd->can_id = can_id; if (es58x_flags & ES58X_FLAG_EFF) cfd->can_id |= CAN_EFF_FLAG; if (is_can_fd) { cfd->len = len; if (es58x_flags & ES58X_FLAG_FD_BRS) cfd->flags |= CANFD_BRS; if (es58x_flags & ES58X_FLAG_FD_ESI) cfd->flags |= CANFD_ESI; } else { can_frame_set_cc_len(ccf, dlc, es58x_priv(netdev)->can.ctrlmode); if (es58x_flags & ES58X_FLAG_RTR) { ccf->can_id |= CAN_RTR_FLAG; len = 0; } } memcpy(cfd->data, data, len); netdev->stats.rx_packets++; netdev->stats.rx_bytes += len; es58x_set_skb_timestamp(netdev, skb, timestamp); netif_rx(skb); es58x_priv(netdev)->err_passive_before_rtx_success = 0; return 0; } /** * es58x_rx_err_msg() - Handle a received CAN event or error message. * @netdev: CAN network device. * @error: Error code. * @event: Event code. * @timestamp: Timestamp received from a ES58X device. * * Handle the errors and events received by the ES58X device, create * a CAN error skb and post it. * * In some rare cases the devices might get stuck alternating between * CAN_STATE_ERROR_PASSIVE and CAN_STATE_ERROR_WARNING. To prevent * this behavior, we force a bus off state if the device goes in * CAN_STATE_ERROR_WARNING for ES58X_MAX_CONSECUTIVE_WARN consecutive * times with no successful transmission or reception in between. * * Once the device is in bus off state, the only way to restart it is * through the drivers/net/can/dev.c:can_restart() function. The * device is technically capable to recover by itself under certain * circumstances, however, allowing self recovery would create * complex race conditions with drivers/net/can/dev.c:can_restart() * and thus was not implemented. To activate automatic restart, please * set the restart-ms parameter (e.g. ip link set can0 type can * restart-ms 100). * * If the bus is really instable, this function would try to send a * lot of log messages. Those are rate limited (i.e. you will see * messages such as "net_ratelimit: XXX callbacks suppressed" in * dmesg). * * Return: zero on success, errno when any error occurs. */ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, enum es58x_event event, u64 timestamp) { struct es58x_priv *priv = es58x_priv(netdev); struct can_priv *can = netdev_priv(netdev); struct can_device_stats *can_stats = &can->can_stats; struct can_frame *cf = NULL; struct sk_buff *skb; int ret = 0; if (!netif_running(netdev)) { if (net_ratelimit()) netdev_info(netdev, "%s: %s is down, dropping packet\n", __func__, netdev->name); netdev->stats.rx_dropped++; return 0; } if (error == ES58X_ERR_OK && event == ES58X_EVENT_OK) { netdev_err(netdev, "%s: Both error and event are zero\n", __func__); return -EINVAL; } skb = alloc_can_err_skb(netdev, &cf); switch (error) { case ES58X_ERR_OK: /* 0: No error */ break; case ES58X_ERR_PROT_STUFF: if (net_ratelimit()) netdev_dbg(netdev, "Error BITSTUFF\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_STUFF; break; case ES58X_ERR_PROT_FORM: if (net_ratelimit()) netdev_dbg(netdev, "Error FORMAT\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_FORM; break; case ES58X_ERR_ACK: if (net_ratelimit()) netdev_dbg(netdev, "Error ACK\n"); if (cf) cf->can_id |= CAN_ERR_ACK; break; case ES58X_ERR_PROT_BIT: if (net_ratelimit()) netdev_dbg(netdev, "Error BIT\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT; break; case ES58X_ERR_PROT_CRC: if (net_ratelimit()) netdev_dbg(netdev, "Error CRC\n"); if (cf) cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; break; case ES58X_ERR_PROT_BIT1: if (net_ratelimit()) netdev_dbg(netdev, "Error: expected a recessive bit but monitored a dominant one\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT1; break; case ES58X_ERR_PROT_BIT0: if (net_ratelimit()) netdev_dbg(netdev, "Error expected a dominant bit but monitored a recessive one\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_BIT0; break; case ES58X_ERR_PROT_OVERLOAD: if (net_ratelimit()) netdev_dbg(netdev, "Error OVERLOAD\n"); if (cf) cf->data[2] |= CAN_ERR_PROT_OVERLOAD; break; case ES58X_ERR_PROT_UNSPEC: if (net_ratelimit()) netdev_dbg(netdev, "Unspecified error\n"); if (cf) cf->can_id |= CAN_ERR_PROT; break; default: if (net_ratelimit()) netdev_err(netdev, "%s: Unspecified error code 0x%04X\n", __func__, (int)error); if (cf) cf->can_id |= CAN_ERR_PROT; break; } switch (event) { case ES58X_EVENT_OK: /* 0: No event */ break; case ES58X_EVENT_CRTL_ACTIVE: if (can->state == CAN_STATE_BUS_OFF) { netdev_err(netdev, "%s: state transition: BUS OFF -> ACTIVE\n", __func__); } if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS ACTIVE\n"); if (cf) cf->data[1] |= CAN_ERR_CRTL_ACTIVE; can->state = CAN_STATE_ERROR_ACTIVE; break; case ES58X_EVENT_CRTL_PASSIVE: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS PASSIVE\n"); /* Either TX or RX error count reached passive state * but we do not know which. Setting both flags by * default. */ if (cf) { cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; } if (can->state < CAN_STATE_BUS_OFF) can->state = CAN_STATE_ERROR_PASSIVE; can_stats->error_passive++; if (priv->err_passive_before_rtx_success < U8_MAX) priv->err_passive_before_rtx_success++; break; case ES58X_EVENT_CRTL_WARNING: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS WARNING\n"); /* Either TX or RX error count reached warning state * but we do not know which. Setting both flags by * default. */ if (cf) { cf->data[1] |= CAN_ERR_CRTL_RX_WARNING; cf->data[1] |= CAN_ERR_CRTL_TX_WARNING; } if (can->state < CAN_STATE_BUS_OFF) can->state = CAN_STATE_ERROR_WARNING; can_stats->error_warning++; break; case ES58X_EVENT_BUSOFF: if (net_ratelimit()) netdev_dbg(netdev, "Event CAN BUS OFF\n"); if (cf) cf->can_id |= CAN_ERR_BUSOFF; can_stats->bus_off++; netif_stop_queue(netdev); if (can->state != CAN_STATE_BUS_OFF) { can->state = CAN_STATE_BUS_OFF; can_bus_off(netdev); ret = can->do_set_mode(netdev, CAN_MODE_STOP); } break; case ES58X_EVENT_SINGLE_WIRE: if (net_ratelimit()) netdev_warn(netdev, "Lost connection on either CAN high or CAN low\n"); /* Lost connection on either CAN high or CAN * low. Setting both flags by default. */ if (cf) { cf->data[4] |= CAN_ERR_TRX_CANH_NO_WIRE; cf->data[4] |= CAN_ERR_TRX_CANL_NO_WIRE; } break; default: if (net_ratelimit()) netdev_err(netdev, "%s: Unspecified event code 0x%04X\n", __func__, (int)event); if (cf) cf->can_id |= CAN_ERR_CRTL; break; } if (cf) { if (cf->data[1]) cf->can_id |= CAN_ERR_CRTL; if (cf->data[2] || cf->data[3]) { cf->can_id |= CAN_ERR_PROT; can_stats->bus_error++; } if (cf->data[4]) cf->can_id |= CAN_ERR_TRX; es58x_set_skb_timestamp(netdev, skb, timestamp); netif_rx(skb); } if ((event & ES58X_EVENT_CRTL_PASSIVE) && priv->err_passive_before_rtx_success == ES58X_CONSECUTIVE_ERR_PASSIVE_MAX) { netdev_info(netdev, "Got %d consecutive warning events with no successful RX or TX. Forcing bus-off\n", priv->err_passive_before_rtx_success); return es58x_rx_err_msg(netdev, ES58X_ERR_OK, ES58X_EVENT_BUSOFF, timestamp); } return ret; } /** * es58x_cmd_ret_desc() - Convert a command type to a string. * @cmd_ret_type: Type of the command which triggered the return code. * * The final line (return "<unknown>") should not be reached. If this * is the case, there is an implementation bug. * * Return: a readable description of the @cmd_ret_type. */ static const char *es58x_cmd_ret_desc(enum es58x_ret_type cmd_ret_type) { switch (cmd_ret_type) { case ES58X_RET_TYPE_SET_BITTIMING: return "Set bittiming"; case ES58X_RET_TYPE_ENABLE_CHANNEL: return "Enable channel"; case ES58X_RET_TYPE_DISABLE_CHANNEL: return "Disable channel"; case ES58X_RET_TYPE_TX_MSG: return "Transmit message"; case ES58X_RET_TYPE_RESET_RX: return "Reset RX"; case ES58X_RET_TYPE_RESET_TX: return "Reset TX"; case ES58X_RET_TYPE_DEVICE_ERR: return "Device error"; } return "<unknown>"; }; /** * es58x_rx_cmd_ret_u8() - Handle the command's return code received * from the ES58X device. * @dev: Device, only used for the dev_XXX() print functions. * @cmd_ret_type: Type of the command which triggered the return code. * @rx_cmd_ret_u8: Command error code as returned by the ES58X device. * * Handles the 8 bits command return code. Those are specific to the * ES581.4 device. The return value will eventually be used by * es58x_handle_urb_cmd() function which will take proper actions in * case of critical issues such and memory errors or bad CRC values. * * In contrast with es58x_rx_cmd_ret_u32(), the network device is * unknown. * * Return: zero on success, return errno when any error occurs. */ int es58x_rx_cmd_ret_u8(struct device *dev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u8 rx_cmd_ret_u8) { const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type); switch (rx_cmd_ret_u8) { case ES58X_RET_U8_OK: dev_dbg_ratelimited(dev, "%s: OK\n", ret_desc); return 0; case ES58X_RET_U8_ERR_UNSPECIFIED_FAILURE: dev_err(dev, "%s: unspecified failure\n", ret_desc); return -EBADMSG; case ES58X_RET_U8_ERR_NO_MEM: dev_err(dev, "%s: device ran out of memory\n", ret_desc); return -ENOMEM; case ES58X_RET_U8_ERR_BAD_CRC: dev_err(dev, "%s: CRC of previous command is incorrect\n", ret_desc); return -EIO; default: dev_err(dev, "%s: returned unknown value: 0x%02X\n", ret_desc, rx_cmd_ret_u8); return -EBADMSG; } } /** * es58x_rx_cmd_ret_u32() - Handle the command return code received * from the ES58X device. * @netdev: CAN network device. * @cmd_ret_type: Type of the command which triggered the return code. * @rx_cmd_ret_u32: error code as returned by the ES58X device. * * Handles the 32 bits command return code. The return value will * eventually be used by es58x_handle_urb_cmd() function which will * take proper actions in case of critical issues such and memory * errors or bad CRC values. * * Return: zero on success, errno when any error occurs. */ int es58x_rx_cmd_ret_u32(struct net_device *netdev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u32 rx_cmd_ret_u32) { struct es58x_priv *priv = es58x_priv(netdev); const struct es58x_operators *ops = priv->es58x_dev->ops; const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type); switch (rx_cmd_ret_u32) { case ES58X_RET_U32_OK: switch (cmd_ret_type) { case ES58X_RET_TYPE_ENABLE_CHANNEL: es58x_can_reset_echo_fifo(netdev); priv->can.state = CAN_STATE_ERROR_ACTIVE; netif_wake_queue(netdev); netdev_info(netdev, "%s: %s (Serial Number %s): CAN%d channel becomes ready\n", ret_desc, priv->es58x_dev->udev->product, priv->es58x_dev->udev->serial, priv->channel_idx + 1); break; case ES58X_RET_TYPE_TX_MSG: if (IS_ENABLED(CONFIG_VERBOSE_DEBUG) && net_ratelimit()) netdev_vdbg(netdev, "%s: OK\n", ret_desc); break; default: netdev_dbg(netdev, "%s: OK\n", ret_desc); break; } return 0; case ES58X_RET_U32_ERR_UNSPECIFIED_FAILURE: if (cmd_ret_type == ES58X_RET_TYPE_ENABLE_CHANNEL) { int ret; netdev_warn(netdev, "%s: channel is already opened, closing and re-opening it to reflect new configuration\n", ret_desc); ret = ops->disable_channel(es58x_priv(netdev)); if (ret) return ret; return ops->enable_channel(es58x_priv(netdev)); } if (cmd_ret_type == ES58X_RET_TYPE_DISABLE_CHANNEL) { netdev_info(netdev, "%s: channel is already closed\n", ret_desc); return 0; } netdev_err(netdev, "%s: unspecified failure\n", ret_desc); return -EBADMSG; case ES58X_RET_U32_ERR_NO_MEM: netdev_err(netdev, "%s: device ran out of memory\n", ret_desc); return -ENOMEM; case ES58X_RET_U32_WARN_PARAM_ADJUSTED: netdev_warn(netdev, "%s: some incompatible parameters have been adjusted\n", ret_desc); return 0; case ES58X_RET_U32_WARN_TX_MAYBE_REORDER: netdev_warn(netdev, "%s: TX messages might have been reordered\n", ret_desc); return 0; case ES58X_RET_U32_ERR_TIMEDOUT: netdev_err(netdev, "%s: command timed out\n", ret_desc); return -ETIMEDOUT; case ES58X_RET_U32_ERR_FIFO_FULL: netdev_warn(netdev, "%s: fifo is full\n", ret_desc); return 0; case ES58X_RET_U32_ERR_BAD_CONFIG: netdev_err(netdev, "%s: bad configuration\n", ret_desc); return -EINVAL; case ES58X_RET_U32_ERR_NO_RESOURCE: netdev_err(netdev, "%s: no resource available\n", ret_desc); return -EBUSY; default: netdev_err(netdev, "%s returned unknown value: 0x%08X\n", ret_desc, rx_cmd_ret_u32); return -EBADMSG; } } /** * es58x_increment_rx_errors() - Increment the network devices' error * count. * @es58x_dev: ES58X device. * * If an error occurs on the early stages on receiving an URB command, * we might not be able to figure out on which network device the * error occurred. In such case, we arbitrarily increment the error * count of all the network devices attached to our ES58X device. */ static void es58x_increment_rx_errors(struct es58x_device *es58x_dev) { int i; for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) es58x_dev->netdev[i]->stats.rx_errors++; } /** * es58x_handle_urb_cmd() - Handle the URB command * @es58x_dev: ES58X device. * @urb_cmd: The URB command received from the ES58X device, might not * be aligned. * * Sends the URB command to the device specific function. Manages the * errors thrown back by those functions. */ static void es58x_handle_urb_cmd(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd) { const struct es58x_operators *ops = es58x_dev->ops; size_t cmd_len; int i, ret; ret = ops->handle_urb_cmd(es58x_dev, urb_cmd); switch (ret) { case 0: /* OK */ return; case -ENODEV: dev_err_ratelimited(es58x_dev->dev, "Device is not ready\n"); break; case -EINVAL: case -EMSGSIZE: case -EBADRQC: case -EBADMSG: case -ECHRNG: case -ETIMEDOUT: cmd_len = es58x_get_urb_cmd_len(es58x_dev, ops->get_msg_len(urb_cmd)); dev_err(es58x_dev->dev, "ops->handle_urb_cmd() returned error %pe", ERR_PTR(ret)); es58x_print_hex_dump(urb_cmd, cmd_len); break; case -EFAULT: case -ENOMEM: case -EIO: default: dev_crit(es58x_dev->dev, "ops->handle_urb_cmd() returned error %pe, detaching all network devices\n", ERR_PTR(ret)); for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); if (es58x_dev->ops->reset_device) es58x_dev->ops->reset_device(es58x_dev); break; } /* Because the urb command could not fully be parsed, * channel_id is not confirmed. Incrementing rx_errors count * of all channels. */ es58x_increment_rx_errors(es58x_dev); } /** * es58x_check_rx_urb() - Check the length and format of the URB command. * @es58x_dev: ES58X device. * @urb_cmd: The URB command received from the ES58X device, might not * be aligned. * @urb_actual_len: The actual length of the URB command. * * Check if the first message of the received urb is valid, that is to * say that both the header and the length are coherent. * * Return: * the length of the first message of the URB on success. * * -ENODATA if the URB command is incomplete (in which case, the URB * command should be buffered and combined with the next URB to try to * reconstitute the URB command). * * -EOVERFLOW if the length is bigger than the maximum expected one. * * -EBADRQC if the start of frame does not match the expected value. */ static signed int es58x_check_rx_urb(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd, u32 urb_actual_len) { const struct device *dev = es58x_dev->dev; const struct es58x_parameters *param = es58x_dev->param; u16 sof, msg_len; signed int urb_cmd_len, ret; if (urb_actual_len < param->urb_cmd_header_len) { dev_vdbg(dev, "%s: Received %d bytes [%*ph]: header incomplete\n", __func__, urb_actual_len, urb_actual_len, urb_cmd->raw_cmd); return -ENODATA; } sof = get_unaligned_le16(&urb_cmd->sof); if (sof != param->rx_start_of_frame) { dev_err_ratelimited(es58x_dev->dev, "%s: Expected sequence 0x%04X for start of frame but got 0x%04X.\n", __func__, param->rx_start_of_frame, sof); return -EBADRQC; } msg_len = es58x_dev->ops->get_msg_len(urb_cmd); urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len); if (urb_cmd_len > param->rx_urb_cmd_max_len) { dev_err_ratelimited(es58x_dev->dev, "%s: Biggest expected size for rx urb_cmd is %u but receive a command of size %d\n", __func__, param->rx_urb_cmd_max_len, urb_cmd_len); return -EOVERFLOW; } else if (urb_actual_len < urb_cmd_len) { dev_vdbg(dev, "%s: Received %02d/%02d bytes\n", __func__, urb_actual_len, urb_cmd_len); return -ENODATA; } ret = es58x_check_crc(es58x_dev, urb_cmd, urb_cmd_len); if (ret) return ret; return urb_cmd_len; } /** * es58x_copy_to_cmd_buf() - Copy an array to the URB command buffer. * @es58x_dev: ES58X device. * @raw_cmd: the buffer we want to copy. * @raw_cmd_len: length of @raw_cmd. * * Concatenates @raw_cmd_len bytes of @raw_cmd to the end of the URB * command buffer. * * Return: zero on success, -EMSGSIZE if not enough space is available * to do the copy. */ static int es58x_copy_to_cmd_buf(struct es58x_device *es58x_dev, u8 *raw_cmd, int raw_cmd_len) { if (es58x_dev->rx_cmd_buf_len + raw_cmd_len > es58x_dev->param->rx_urb_cmd_max_len) return -EMSGSIZE; memcpy(&es58x_dev->rx_cmd_buf.raw_cmd[es58x_dev->rx_cmd_buf_len], raw_cmd, raw_cmd_len); es58x_dev->rx_cmd_buf_len += raw_cmd_len; return 0; } /** * es58x_split_urb_try_recovery() - Try to recover bad URB sequences. * @es58x_dev: ES58X device. * @raw_cmd: pointer to the buffer we want to copy. * @raw_cmd_len: length of @raw_cmd. * * Under some rare conditions, we might get incorrect URBs from the * device. From our observations, one of the valid URB gets replaced * by one from the past. The full root cause is not identified. * * This function looks for the next start of frame in the urb buffer * in order to try to recover. * * Such behavior was not observed on the devices of the ES58X FD * family and only seems to impact the ES581.4. * * Return: the number of bytes dropped on success, -EBADMSG if recovery failed. */ static int es58x_split_urb_try_recovery(struct es58x_device *es58x_dev, u8 *raw_cmd, size_t raw_cmd_len) { union es58x_urb_cmd *urb_cmd; signed int urb_cmd_len; u16 sof; int dropped_bytes = 0; es58x_increment_rx_errors(es58x_dev); while (raw_cmd_len > sizeof(sof)) { urb_cmd = (union es58x_urb_cmd *)raw_cmd; sof = get_unaligned_le16(&urb_cmd->sof); if (sof == es58x_dev->param->rx_start_of_frame) { urb_cmd_len = es58x_check_rx_urb(es58x_dev, urb_cmd, raw_cmd_len); if ((urb_cmd_len == -ENODATA) || urb_cmd_len > 0) { dev_info_ratelimited(es58x_dev->dev, "Recovery successful! Dropped %d bytes (urb_cmd_len: %d)\n", dropped_bytes, urb_cmd_len); return dropped_bytes; } } raw_cmd++; raw_cmd_len--; dropped_bytes++; } dev_warn_ratelimited(es58x_dev->dev, "%s: Recovery failed\n", __func__); return -EBADMSG; } /** * es58x_handle_incomplete_cmd() - Reconstitute an URB command from * different URB pieces. * @es58x_dev: ES58X device. * @urb: last urb buffer received. * * The device might split the URB commands in an arbitrary amount of * pieces. This function concatenates those in an URB buffer until a * full URB command is reconstituted and consume it. * * Return: * number of bytes consumed from @urb if successful. * * -ENODATA if the URB command is still incomplete. * * -EBADMSG if the URB command is incorrect. */ static signed int es58x_handle_incomplete_cmd(struct es58x_device *es58x_dev, struct urb *urb) { size_t cpy_len; signed int urb_cmd_len, tmp_cmd_buf_len, ret; tmp_cmd_buf_len = es58x_dev->rx_cmd_buf_len; cpy_len = min_t(int, es58x_dev->param->rx_urb_cmd_max_len - es58x_dev->rx_cmd_buf_len, urb->actual_length); ret = es58x_copy_to_cmd_buf(es58x_dev, urb->transfer_buffer, cpy_len); if (ret < 0) return ret; urb_cmd_len = es58x_check_rx_urb(es58x_dev, &es58x_dev->rx_cmd_buf, es58x_dev->rx_cmd_buf_len); if (urb_cmd_len == -ENODATA) { return -ENODATA; } else if (urb_cmd_len < 0) { dev_err_ratelimited(es58x_dev->dev, "Could not reconstitute incomplete command from previous URB, dropping %d bytes\n", tmp_cmd_buf_len + urb->actual_length); dev_err_ratelimited(es58x_dev->dev, "Error code: %pe, es58x_dev->rx_cmd_buf_len: %d, urb->actual_length: %u\n", ERR_PTR(urb_cmd_len), tmp_cmd_buf_len, urb->actual_length); es58x_print_hex_dump(&es58x_dev->rx_cmd_buf, tmp_cmd_buf_len); es58x_print_hex_dump(urb->transfer_buffer, urb->actual_length); return urb->actual_length; } es58x_handle_urb_cmd(es58x_dev, &es58x_dev->rx_cmd_buf); return urb_cmd_len - tmp_cmd_buf_len; /* consumed length */ } /** * es58x_split_urb() - Cut the received URB in individual URB commands. * @es58x_dev: ES58X device. * @urb: last urb buffer received. * * The device might send urb in bulk format (i.e. several URB commands * concatenated together). This function will split all the commands * contained in the urb. * * Return: * number of bytes consumed from @urb if successful. * * -ENODATA if the URB command is incomplete. * * -EBADMSG if the URB command is incorrect. */ static signed int es58x_split_urb(struct es58x_device *es58x_dev, struct urb *urb) { union es58x_urb_cmd *urb_cmd; u8 *raw_cmd = urb->transfer_buffer; s32 raw_cmd_len = urb->actual_length; int ret; if (es58x_dev->rx_cmd_buf_len != 0) { ret = es58x_handle_incomplete_cmd(es58x_dev, urb); if (ret != -ENODATA) es58x_dev->rx_cmd_buf_len = 0; if (ret < 0) return ret; raw_cmd += ret; raw_cmd_len -= ret; } while (raw_cmd_len > 0) { if (raw_cmd[0] == ES58X_HEARTBEAT) { raw_cmd++; raw_cmd_len--; continue; } urb_cmd = (union es58x_urb_cmd *)raw_cmd; ret = es58x_check_rx_urb(es58x_dev, urb_cmd, raw_cmd_len); if (ret > 0) { es58x_handle_urb_cmd(es58x_dev, urb_cmd); } else if (ret == -ENODATA) { es58x_copy_to_cmd_buf(es58x_dev, raw_cmd, raw_cmd_len); return -ENODATA; } else if (ret < 0) { ret = es58x_split_urb_try_recovery(es58x_dev, raw_cmd, raw_cmd_len); if (ret < 0) return ret; } raw_cmd += ret; raw_cmd_len -= ret; } return 0; } /** * es58x_read_bulk_callback() - Callback for reading data from device. * @urb: last urb buffer received. * * This function gets eventually called each time an URB is received * from the ES58X device. * * Checks urb status, calls read function and resubmits urb read * operation. */ static void es58x_read_bulk_callback(struct urb *urb) { struct es58x_device *es58x_dev = urb->context; const struct device *dev = es58x_dev->dev; int i, ret; switch (urb->status) { case 0: /* success */ break; case -EOVERFLOW: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); es58x_print_hex_dump_debug(urb->transfer_buffer, urb->transfer_buffer_length); goto resubmit_urb; case -EPROTO: dev_warn_ratelimited(dev, "%s: error %pe. Device unplugged?\n", __func__, ERR_PTR(urb->status)); goto free_urb; case -ENOENT: case -EPIPE: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto free_urb; case -ESHUTDOWN: dev_dbg_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto free_urb; default: dev_err_ratelimited(dev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); goto resubmit_urb; } ret = es58x_split_urb(es58x_dev, urb); if ((ret != -ENODATA) && ret < 0) { dev_err(es58x_dev->dev, "es58x_split_urb() returned error %pe", ERR_PTR(ret)); es58x_print_hex_dump_debug(urb->transfer_buffer, urb->actual_length); /* Because the urb command could not be parsed, * channel_id is not confirmed. Incrementing rx_errors * count of all channels. */ es58x_increment_rx_errors(es58x_dev); } resubmit_urb: ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret == -ENODEV) { for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); } else if (ret) dev_err_ratelimited(dev, "Failed resubmitting read bulk urb: %pe\n", ERR_PTR(ret)); return; free_urb: usb_free_coherent(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); } /** * es58x_write_bulk_callback() - Callback after writing data to the device. * @urb: urb buffer which was previously submitted. * * This function gets eventually called each time an URB was sent to * the ES58X device. * * Puts the @urb back to the urbs idle anchor and tries to restart the * network queue. */ static void es58x_write_bulk_callback(struct urb *urb) { struct net_device *netdev = urb->context; struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; switch (urb->status) { case 0: /* success */ break; case -EOVERFLOW: if (net_ratelimit()) netdev_err(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); es58x_print_hex_dump(urb->transfer_buffer, urb->transfer_buffer_length); break; case -ENOENT: if (net_ratelimit()) netdev_dbg(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); return; default: if (net_ratelimit()) netdev_info(netdev, "%s: error %pe\n", __func__, ERR_PTR(urb->status)); break; } usb_anchor_urb(urb, &es58x_dev->tx_urbs_idle); atomic_inc(&es58x_dev->tx_urbs_idle_cnt); } /** * es58x_alloc_urb() - Allocate memory for an URB and its transfer * buffer. * @es58x_dev: ES58X device. * @urb: URB to be allocated. * @buf: used to return DMA address of buffer. * @buf_len: requested buffer size. * @mem_flags: affect whether allocation may block. * * Allocates an URB and its @transfer_buffer and set its @transfer_dma * address. * * This function is used at start-up to allocate all RX URBs at once * and during run time for TX URBs. * * Return: zero on success, -ENOMEM if no memory is available. */ static int es58x_alloc_urb(struct es58x_device *es58x_dev, struct urb **urb, u8 **buf, size_t buf_len, gfp_t mem_flags) { *urb = usb_alloc_urb(0, mem_flags); if (!*urb) { dev_err(es58x_dev->dev, "No memory left for URBs\n"); return -ENOMEM; } *buf = usb_alloc_coherent(es58x_dev->udev, buf_len, mem_flags, &(*urb)->transfer_dma); if (!*buf) { dev_err(es58x_dev->dev, "No memory left for USB buffer\n"); usb_free_urb(*urb); return -ENOMEM; } (*urb)->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; return 0; } /** * es58x_get_tx_urb() - Get an URB for transmission. * @es58x_dev: ES58X device. * * Gets an URB from the idle urbs anchor or allocate a new one if the * anchor is empty. * * If there are more than ES58X_TX_URBS_MAX in the idle anchor, do * some garbage collection. The garbage collection is done here * instead of within es58x_write_bulk_callback() because * usb_free_coherent() should not be used in IRQ context: * c.f. WARN_ON(irqs_disabled()) in dma_free_attrs(). * * Return: a pointer to an URB on success, NULL if no memory is * available. */ static struct urb *es58x_get_tx_urb(struct es58x_device *es58x_dev) { atomic_t *idle_cnt = &es58x_dev->tx_urbs_idle_cnt; struct urb *urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle); if (!urb) { size_t tx_buf_len; u8 *buf; tx_buf_len = es58x_dev->param->tx_urb_cmd_max_len; if (es58x_alloc_urb(es58x_dev, &urb, &buf, tx_buf_len, GFP_ATOMIC)) return NULL; usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->tx_pipe, buf, tx_buf_len, es58x_write_bulk_callback, NULL); return urb; } while (atomic_dec_return(idle_cnt) > ES58X_TX_URBS_MAX) { /* Garbage collector */ struct urb *tmp = usb_get_from_anchor(&es58x_dev->tx_urbs_idle); if (!tmp) break; usb_free_coherent(tmp->dev, es58x_dev->param->tx_urb_cmd_max_len, tmp->transfer_buffer, tmp->transfer_dma); usb_free_urb(tmp); } return urb; } /** * es58x_submit_urb() - Send data to the device. * @es58x_dev: ES58X device. * @urb: URB to be sent. * @netdev: CAN network device. * * Return: zero on success, errno when any error occurs. */ static int es58x_submit_urb(struct es58x_device *es58x_dev, struct urb *urb, struct net_device *netdev) { int ret; es58x_set_crc(urb->transfer_buffer, urb->transfer_buffer_length); urb->context = netdev; usb_anchor_urb(urb, &es58x_dev->tx_urbs_busy); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { netdev_err(netdev, "%s: USB send urb failure: %pe\n", __func__, ERR_PTR(ret)); usb_unanchor_urb(urb); usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); } usb_free_urb(urb); return ret; } /** * es58x_send_msg() - Prepare an URB and submit it. * @es58x_dev: ES58X device. * @cmd_type: Command type. * @cmd_id: Command ID. * @msg: ES58X message to be sent. * @msg_len: Length of @msg. * @channel_idx: Index of the network device. * * Creates an URB command from a given message, sets the header and the * CRC and then submits it. * * Return: zero on success, errno when any error occurs. */ int es58x_send_msg(struct es58x_device *es58x_dev, u8 cmd_type, u8 cmd_id, const void *msg, u16 msg_len, int channel_idx) { struct net_device *netdev; union es58x_urb_cmd *urb_cmd; struct urb *urb; int urb_cmd_len; if (channel_idx == ES58X_CHANNEL_IDX_NA) netdev = es58x_dev->netdev[0]; /* Default to first channel */ else netdev = es58x_dev->netdev[channel_idx]; urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len); if (urb_cmd_len > es58x_dev->param->tx_urb_cmd_max_len) return -EOVERFLOW; urb = es58x_get_tx_urb(es58x_dev); if (!urb) return -ENOMEM; urb_cmd = urb->transfer_buffer; es58x_dev->ops->fill_urb_header(urb_cmd, cmd_type, cmd_id, channel_idx, msg_len); memcpy(&urb_cmd->raw_cmd[es58x_dev->param->urb_cmd_header_len], msg, msg_len); urb->transfer_buffer_length = urb_cmd_len; return es58x_submit_urb(es58x_dev, urb, netdev); } /** * es58x_alloc_rx_urbs() - Allocate RX URBs. * @es58x_dev: ES58X device. * * Allocate URBs for reception and anchor them. * * Return: zero on success, errno when any error occurs. */ static int es58x_alloc_rx_urbs(struct es58x_device *es58x_dev) { const struct device *dev = es58x_dev->dev; const struct es58x_parameters *param = es58x_dev->param; u16 rx_buf_len = usb_maxpacket(es58x_dev->udev, es58x_dev->rx_pipe); struct urb *urb; u8 *buf; int i; int ret = -EINVAL; for (i = 0; i < param->rx_urb_max; i++) { ret = es58x_alloc_urb(es58x_dev, &urb, &buf, rx_buf_len, GFP_KERNEL); if (ret) break; usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->rx_pipe, buf, rx_buf_len, es58x_read_bulk_callback, es58x_dev); usb_anchor_urb(urb, &es58x_dev->rx_urbs); ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); usb_free_coherent(es58x_dev->udev, rx_buf_len, buf, urb->transfer_dma); usb_free_urb(urb); break; } usb_free_urb(urb); } if (i == 0) { dev_err(dev, "%s: Could not setup any rx URBs\n", __func__); return ret; } dev_dbg(dev, "%s: Allocated %d rx URBs each of size %u\n", __func__, i, rx_buf_len); return ret; } /** * es58x_free_urbs() - Free all the TX and RX URBs. * @es58x_dev: ES58X device. */ static void es58x_free_urbs(struct es58x_device *es58x_dev) { struct urb *urb; if (!usb_wait_anchor_empty_timeout(&es58x_dev->tx_urbs_busy, 1000)) { dev_err(es58x_dev->dev, "%s: Timeout, some TX urbs still remain\n", __func__); usb_kill_anchored_urbs(&es58x_dev->tx_urbs_busy); } while ((urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle)) != NULL) { usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); atomic_dec(&es58x_dev->tx_urbs_idle_cnt); } if (atomic_read(&es58x_dev->tx_urbs_idle_cnt)) dev_err(es58x_dev->dev, "All idle urbs were freed but tx_urb_idle_cnt is %d\n", atomic_read(&es58x_dev->tx_urbs_idle_cnt)); usb_kill_anchored_urbs(&es58x_dev->rx_urbs); } /** * es58x_open() - Enable the network device. * @netdev: CAN network device. * * Called when the network transitions to the up state. Allocate the * URB resources if needed and open the channel. * * Return: zero on success, errno when any error occurs. */ static int es58x_open(struct net_device *netdev) { struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev; int ret; if (!es58x_dev->opened_channel_cnt) { ret = es58x_alloc_rx_urbs(es58x_dev); if (ret) return ret; ret = es58x_set_realtime_diff_ns(es58x_dev); if (ret) goto free_urbs; } ret = open_candev(netdev); if (ret) goto free_urbs; ret = es58x_dev->ops->enable_channel(es58x_priv(netdev)); if (ret) goto free_urbs; es58x_dev->opened_channel_cnt++; netif_start_queue(netdev); return ret; free_urbs: if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); netdev_err(netdev, "%s: Could not open the network device: %pe\n", __func__, ERR_PTR(ret)); return ret; } /** * es58x_stop() - Disable the network device. * @netdev: CAN network device. * * Called when the network transitions to the down state. If all the * channels of the device are closed, free the URB resources which are * not needed anymore. * * Return: zero on success, errno when any error occurs. */ static int es58x_stop(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; int ret; netif_stop_queue(netdev); ret = es58x_dev->ops->disable_channel(priv); if (ret) return ret; priv->can.state = CAN_STATE_STOPPED; es58x_can_reset_echo_fifo(netdev); close_candev(netdev); es58x_flush_pending_tx_msg(netdev); es58x_dev->opened_channel_cnt--; if (!es58x_dev->opened_channel_cnt) es58x_free_urbs(es58x_dev); return 0; } /** * es58x_xmit_commit() - Send the bulk urb. * @netdev: CAN network device. * * Do the bulk send. This function should be called only once by bulk * transmission. * * Return: zero on success, errno when any error occurs. */ static int es58x_xmit_commit(struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); int ret; if (!es58x_is_can_state_active(netdev)) return -ENETDOWN; if (es58x_is_echo_skb_threshold_reached(priv)) netif_stop_queue(netdev); ret = es58x_submit_urb(priv->es58x_dev, priv->tx_urb, netdev); if (ret == 0) priv->tx_urb = NULL; return ret; } /** * es58x_xmit_more() - Can we put more packets? * @priv: ES58X private parameters related to the network device. * * Return: true if we can put more, false if it is time to send. */ static bool es58x_xmit_more(struct es58x_priv *priv) { unsigned int free_slots = priv->can.echo_skb_max - (priv->tx_head - priv->tx_tail); return netdev_xmit_more() && free_slots > 0 && priv->tx_can_msg_cnt < priv->es58x_dev->param->tx_bulk_max; } /** * es58x_start_xmit() - Transmit an skb. * @skb: socket buffer of a CAN message. * @netdev: CAN network device. * * Called when a packet needs to be transmitted. * * This function relies on Byte Queue Limits (BQL). The main benefit * is to increase the throughput by allowing bulk transfers * (c.f. xmit_more flag). * * Queues up to tx_bulk_max messages in &tx_urb buffer and does * a bulk send of all messages in one single URB. * * Return: NETDEV_TX_OK regardless of if we could transmit the @skb or * had to drop it. */ static netdev_tx_t es58x_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct es58x_priv *priv = es58x_priv(netdev); struct es58x_device *es58x_dev = priv->es58x_dev; unsigned int frame_len; int ret; if (can_dev_dropped_skb(netdev, skb)) { if (priv->tx_urb) goto xmit_commit; return NETDEV_TX_OK; } if (priv->tx_urb && priv->tx_can_msg_is_fd != can_is_canfd_skb(skb)) { /* Can not do bulk send with mixed CAN and CAN FD frames. */ ret = es58x_xmit_commit(netdev); if (ret) goto drop_skb; } if (!priv->tx_urb) { priv->tx_urb = es58x_get_tx_urb(es58x_dev); if (!priv->tx_urb) { ret = -ENOMEM; goto drop_skb; } priv->tx_can_msg_cnt = 0; priv->tx_can_msg_is_fd = can_is_canfd_skb(skb); } ret = es58x_dev->ops->tx_can_msg(priv, skb); if (ret) goto drop_skb; frame_len = can_skb_get_frame_len(skb); ret = can_put_echo_skb(skb, netdev, priv->tx_head & es58x_dev->param->fifo_mask, frame_len); if (ret) goto xmit_failure; netdev_sent_queue(netdev, frame_len); priv->tx_head++; priv->tx_can_msg_cnt++; xmit_commit: if (!es58x_xmit_more(priv)) { ret = es58x_xmit_commit(netdev); if (ret) goto xmit_failure; } return NETDEV_TX_OK; drop_skb: dev_kfree_skb(skb); netdev->stats.tx_dropped++; xmit_failure: netdev_warn(netdev, "%s: send message failure: %pe\n", __func__, ERR_PTR(ret)); netdev->stats.tx_errors++; es58x_flush_pending_tx_msg(netdev); return NETDEV_TX_OK; } static const struct net_device_ops es58x_netdev_ops = { .ndo_open = es58x_open, .ndo_stop = es58x_stop, .ndo_start_xmit = es58x_start_xmit, .ndo_eth_ioctl = can_eth_ioctl_hwts, }; static const struct ethtool_ops es58x_ethtool_ops = { .get_ts_info = can_ethtool_op_get_ts_info_hwts, }; /** * es58x_set_mode() - Change network device mode. * @netdev: CAN network device. * @mode: either %CAN_MODE_START, %CAN_MODE_STOP or %CAN_MODE_SLEEP * * Currently, this function is only used to stop and restart the * channel during a bus off event (c.f. es58x_rx_err_msg() and * drivers/net/can/dev.c:can_restart() which are the two only * callers). * * Return: zero on success, errno when any error occurs. */ static int es58x_set_mode(struct net_device *netdev, enum can_mode mode) { struct es58x_priv *priv = es58x_priv(netdev); switch (mode) { case CAN_MODE_START: switch (priv->can.state) { case CAN_STATE_BUS_OFF: return priv->es58x_dev->ops->enable_channel(priv); case CAN_STATE_STOPPED: return es58x_open(netdev); case CAN_STATE_ERROR_ACTIVE: case CAN_STATE_ERROR_WARNING: case CAN_STATE_ERROR_PASSIVE: default: return 0; } case CAN_MODE_STOP: switch (priv->can.state) { case CAN_STATE_STOPPED: return 0; case CAN_STATE_ERROR_ACTIVE: case CAN_STATE_ERROR_WARNING: case CAN_STATE_ERROR_PASSIVE: case CAN_STATE_BUS_OFF: default: return priv->es58x_dev->ops->disable_channel(priv); } case CAN_MODE_SLEEP: default: return -EOPNOTSUPP; } } /** * es58x_init_priv() - Initialize private parameters. * @es58x_dev: ES58X device. * @priv: ES58X private parameters related to the network device. * @channel_idx: Index of the network device. * * Return: zero on success, errno if devlink port could not be * properly registered. */ static int es58x_init_priv(struct es58x_device *es58x_dev, struct es58x_priv *priv, int channel_idx) { struct devlink_port_attrs attrs = { .flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL, }; const struct es58x_parameters *param = es58x_dev->param; struct can_priv *can = &priv->can; priv->es58x_dev = es58x_dev; priv->channel_idx = channel_idx; priv->tx_urb = NULL; priv->tx_can_msg_cnt = 0; can->bittiming_const = param->bittiming_const; if (param->ctrlmode_supported & CAN_CTRLMODE_FD) { can->data_bittiming_const = param->data_bittiming_const; can->tdc_const = param->tdc_const; } can->bitrate_max = param->bitrate_max; can->clock = param->clock; can->state = CAN_STATE_STOPPED; can->ctrlmode_supported = param->ctrlmode_supported; can->do_set_mode = es58x_set_mode; devlink_port_attrs_set(&priv->devlink_port, &attrs); return devlink_port_register(priv_to_devlink(es58x_dev), &priv->devlink_port, channel_idx); } /** * es58x_init_netdev() - Initialize the network device. * @es58x_dev: ES58X device. * @channel_idx: Index of the network device. * * Return: zero on success, errno when any error occurs. */ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) { struct net_device *netdev; struct device *dev = es58x_dev->dev; int ret; netdev = alloc_candev(sizeof(struct es58x_priv), es58x_dev->param->fifo_mask + 1); if (!netdev) { dev_err(dev, "Could not allocate candev\n"); return -ENOMEM; } SET_NETDEV_DEV(netdev, dev); es58x_dev->netdev[channel_idx] = netdev; ret = es58x_init_priv(es58x_dev, es58x_priv(netdev), channel_idx); if (ret) goto free_candev; SET_NETDEV_DEVLINK_PORT(netdev, &es58x_priv(netdev)->devlink_port); netdev->netdev_ops = &es58x_netdev_ops; netdev->ethtool_ops = &es58x_ethtool_ops; netdev->flags |= IFF_ECHO; /* We support local echo */ netdev->dev_port = channel_idx; ret = register_candev(netdev); if (ret) goto devlink_port_unregister; netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); return ret; devlink_port_unregister: devlink_port_unregister(&es58x_priv(netdev)->devlink_port); free_candev: es58x_dev->netdev[channel_idx] = NULL; free_candev(netdev); return ret; } /** * es58x_free_netdevs() - Release all network resources of the device. * @es58x_dev: ES58X device. */ static void es58x_free_netdevs(struct es58x_device *es58x_dev) { int i; for (i = 0; i < es58x_dev->num_can_ch; i++) { struct net_device *netdev = es58x_dev->netdev[i]; if (!netdev) continue; unregister_candev(netdev); devlink_port_unregister(&es58x_priv(netdev)->devlink_port); es58x_dev->netdev[i] = NULL; free_candev(netdev); } } /** * es58x_init_es58x_dev() - Initialize the ES58X device. * @intf: USB interface. * @driver_info: Quirks of the device. * * Return: pointer to an ES58X device on success, error pointer when * any error occurs. */ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, kernel_ulong_t driver_info) { struct device *dev = &intf->dev; struct es58x_device *es58x_dev; struct devlink *devlink; const struct es58x_parameters *param; const struct es58x_operators *ops; struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep_in, *ep_out; int ret; dev_info(dev, "Starting %s %s (Serial Number %s)\n", udev->manufacturer, udev->product, udev->serial); ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, NULL, NULL); if (ret) return ERR_PTR(ret); if (driver_info & ES58X_FD_FAMILY) { param = &es58x_fd_param; ops = &es58x_fd_ops; } else { param = &es581_4_param; ops = &es581_4_ops; } devlink = devlink_alloc(&es58x_dl_ops, es58x_sizeof_es58x_device(param), dev); if (!devlink) return ERR_PTR(-ENOMEM); es58x_dev = devlink_priv(devlink); es58x_dev->param = param; es58x_dev->ops = ops; es58x_dev->dev = dev; es58x_dev->udev = udev; if (driver_info & ES58X_DUAL_CHANNEL) es58x_dev->num_can_ch = 2; else es58x_dev->num_can_ch = 1; init_usb_anchor(&es58x_dev->rx_urbs); init_usb_anchor(&es58x_dev->tx_urbs_idle); init_usb_anchor(&es58x_dev->tx_urbs_busy); atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0); usb_set_intfdata(intf, es58x_dev); es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev, ep_in->bEndpointAddress); es58x_dev->tx_pipe = usb_sndbulkpipe(es58x_dev->udev, ep_out->bEndpointAddress); return es58x_dev; } /** * es58x_probe() - Initialize the USB device. * @intf: USB interface. * @id: USB device ID. * * Return: zero on success, -ENODEV if the interface is not supported * or errno when any other error occurs. */ static int es58x_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct es58x_device *es58x_dev; int ch_idx; es58x_dev = es58x_init_es58x_dev(intf, id->driver_info); if (IS_ERR(es58x_dev)) return PTR_ERR(es58x_dev); es58x_parse_product_info(es58x_dev); devlink_register(priv_to_devlink(es58x_dev)); for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) { int ret = es58x_init_netdev(es58x_dev, ch_idx); if (ret) { es58x_free_netdevs(es58x_dev); return ret; } } return 0; } /** * es58x_disconnect() - Disconnect the USB device. * @intf: USB interface * * Called by the usb core when driver is unloaded or device is * removed. */ static void es58x_disconnect(struct usb_interface *intf) { struct es58x_device *es58x_dev = usb_get_intfdata(intf); dev_info(&intf->dev, "Disconnecting %s %s\n", es58x_dev->udev->manufacturer, es58x_dev->udev->product); devlink_unregister(priv_to_devlink(es58x_dev)); es58x_free_netdevs(es58x_dev); es58x_free_urbs(es58x_dev); devlink_free(priv_to_devlink(es58x_dev)); usb_set_intfdata(intf, NULL); } static struct usb_driver es58x_driver = { .name = KBUILD_MODNAME, .probe = es58x_probe, .disconnect = es58x_disconnect, .id_table = es58x_id_table }; module_usb_driver(es58x_driver);
35 11 1 1 18 6 2 12 1 10 14 17 2 2 4 4 4 7 7 20 11 10 1 10 3 23 24 15 1 6 1 37 19 2 15 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> struct nft_limit { spinlock_t lock; u64 last; u64 tokens; }; struct nft_limit_priv { struct nft_limit *limit; u64 tokens_max; u64 rate; u64 nsecs; u32 burst; bool invert; }; static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) { u64 now, tokens; s64 delta; spin_lock_bh(&priv->limit->lock); now = ktime_get_ns(); tokens = priv->limit->tokens + now - priv->limit->last; if (tokens > priv->tokens_max) tokens = priv->tokens_max; priv->limit->last = now; delta = tokens - cost; if (delta >= 0) { priv->limit->tokens = delta; spin_unlock_bh(&priv->limit->lock); return priv->invert; } priv->limit->tokens = tokens; spin_unlock_bh(&priv->limit->lock); return !priv->invert; } /* Use same default as in iptables. */ #define NFT_LIMIT_PKT_BURST_DEFAULT 5 static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { u64 unit, tokens, rate_with_burst; bool invert = false; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); if (priv->rate == 0) return -EINVAL; unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs)) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); if (pkts && priv->burst == 0) priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst)) return -EOVERFLOW; if (pkts) { u64 tmp = div64_u64(priv->nsecs, priv->rate); if (check_mul_overflow(tmp, priv->burst, &tokens)) return -EOVERFLOW; } else { u64 tmp; /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp)) return -EOVERFLOW; tokens = div64_u64(tmp, priv->rate); } if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); if (flags & ~NFT_LIMIT_F_INV) return -EOPNOTSUPP; if (flags & NFT_LIMIT_F_INV) invert = true; } priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT); if (!priv->limit) return -ENOMEM; priv->limit->tokens = tokens; priv->tokens_max = priv->limit->tokens; priv->invert = invert; priv->limit->last = ktime_get_ns(); spin_lock_init(&priv->limit->lock); return 0; } static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv, enum nft_limit_type type) { u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC); if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) || nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) || nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_limit_destroy(const struct nft_ctx *ctx, const struct nft_limit_priv *priv) { kfree(priv->limit); } static int nft_limit_clone(struct nft_limit_priv *priv_dst, const struct nft_limit_priv *priv_src, gfp_t gfp) { priv_dst->tokens_max = priv_src->tokens_max; priv_dst->rate = priv_src->rate; priv_dst->nsecs = priv_src->nsecs; priv_dst->burst = priv_src->burst; priv_dst->invert = priv_src->invert; priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp); if (!priv_dst->limit) return -ENOMEM; spin_lock_init(&priv_dst->limit->lock); priv_dst->limit->tokens = priv_src->tokens_max; priv_dst->limit->last = ktime_get_ns(); return 0; } struct nft_limit_priv_pkts { struct nft_limit_priv limit; u64 cost; }; static void nft_limit_pkts_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 }, }; static int nft_limit_pkts_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); int err; err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate); return 0; } static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } static void nft_limit_pkts_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); nft_limit_destroy(ctx, &priv->limit); } static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); priv_dst->cost = priv_src->cost; return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp); } static struct nft_expr_type nft_limit_type; static const struct nft_expr_ops nft_limit_pkts_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .eval = nft_limit_pkts_eval, .init = nft_limit_pkts_init, .destroy = nft_limit_pkts_destroy, .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, .reduce = NFT_REDUCE_READONLY, }; static void nft_limit_bytes_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit_priv *priv = nft_expr_priv(expr); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) regs->verdict.code = NFT_BREAK; } static int nft_limit_bytes_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_limit_priv *priv = nft_expr_priv(expr); return nft_limit_init(priv, tb, false); } static int nft_limit_bytes_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_limit_priv *priv = nft_expr_priv(expr); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } static void nft_limit_bytes_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_limit_priv *priv = nft_expr_priv(expr); nft_limit_destroy(ctx, priv); } static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_limit_priv *priv_dst = nft_expr_priv(dst); struct nft_limit_priv *priv_src = nft_expr_priv(src); return nft_limit_clone(priv_dst, priv_src, gfp); } static const struct nft_expr_ops nft_limit_bytes_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)), .eval = nft_limit_bytes_eval, .init = nft_limit_bytes_init, .dump = nft_limit_bytes_dump, .clone = nft_limit_bytes_clone, .destroy = nft_limit_bytes_destroy, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * nft_limit_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (tb[NFTA_LIMIT_TYPE] == NULL) return &nft_limit_pkts_ops; switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { case NFT_LIMIT_PKTS: return &nft_limit_pkts_ops; case NFT_LIMIT_PKT_BYTES: return &nft_limit_bytes_ops; } return ERR_PTR(-EOPNOTSUPP); } static struct nft_expr_type nft_limit_type __read_mostly = { .name = "limit", .select_ops = nft_limit_select_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; static void nft_limit_obj_pkts_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit_priv_pkts *priv = nft_obj_data(obj); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; } static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_limit_priv_pkts *priv = nft_obj_data(obj); int err; err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate); return 0; } static int nft_limit_obj_pkts_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { const struct nft_limit_priv_pkts *priv = nft_obj_data(obj); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_limit_priv_pkts *priv = nft_obj_data(obj); nft_limit_destroy(ctx, &priv->limit); } static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_pkts_ops = { .type = &nft_limit_obj_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .init = nft_limit_obj_pkts_init, .destroy = nft_limit_obj_pkts_destroy, .eval = nft_limit_obj_pkts_eval, .dump = nft_limit_obj_pkts_dump, }; static void nft_limit_obj_bytes_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit_priv *priv = nft_obj_data(obj); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) regs->verdict.code = NFT_BREAK; } static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_limit_priv *priv = nft_obj_data(obj); return nft_limit_init(priv, tb, false); } static int nft_limit_obj_bytes_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { const struct nft_limit_priv *priv = nft_obj_data(obj); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_limit_priv *priv = nft_obj_data(obj); nft_limit_destroy(ctx, priv); } static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_bytes_ops = { .type = &nft_limit_obj_type, .size = sizeof(struct nft_limit_priv), .init = nft_limit_obj_bytes_init, .destroy = nft_limit_obj_bytes_destroy, .eval = nft_limit_obj_bytes_eval, .dump = nft_limit_obj_bytes_dump, }; static const struct nft_object_ops * nft_limit_obj_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (!tb[NFTA_LIMIT_TYPE]) return &nft_limit_obj_pkts_ops; switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { case NFT_LIMIT_PKTS: return &nft_limit_obj_pkts_ops; case NFT_LIMIT_PKT_BYTES: return &nft_limit_obj_bytes_ops; } return ERR_PTR(-EOPNOTSUPP); } static struct nft_object_type nft_limit_obj_type __read_mostly = { .select_ops = nft_limit_obj_select_ops, .type = NFT_OBJECT_LIMIT, .maxattr = NFTA_LIMIT_MAX, .policy = nft_limit_policy, .owner = THIS_MODULE, }; static int __init nft_limit_module_init(void) { int err; err = nft_register_obj(&nft_limit_obj_type); if (err < 0) return err; err = nft_register_expr(&nft_limit_type); if (err < 0) goto err1; return 0; err1: nft_unregister_obj(&nft_limit_obj_type); return err; } static void __exit nft_limit_module_exit(void) { nft_unregister_expr(&nft_limit_type); nft_unregister_obj(&nft_limit_obj_type); } module_init(nft_limit_module_init); module_exit(nft_limit_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("limit"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT); MODULE_DESCRIPTION("nftables limit expression support");
6 1 31 31 31 1 1 1 1 15 15 2 1 1 1 2 2 2 2 2 2 1 2 2 2 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 /* * net/tipc/monitor.c * * Copyright (c) 2016, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <net/genetlink.h> #include "core.h" #include "addr.h" #include "monitor.h" #include "bearer.h" #define MAX_MON_DOMAIN 64 #define MON_TIMEOUT 120000 #define MAX_PEER_DOWN_EVENTS 4 /* struct tipc_mon_domain: domain record to be transferred between peers * @len: actual size of domain record * @gen: current generation of sender's domain * @ack_gen: most recent generation of self's domain acked by peer * @member_cnt: number of domain member nodes described in this record * @up_map: bit map indicating which of the members the sender considers up * @members: identity of the domain members */ struct tipc_mon_domain { u16 len; u16 gen; u16 ack_gen; u16 member_cnt; u64 up_map; u32 members[MAX_MON_DOMAIN]; }; /* struct tipc_peer: state of a peer node and its domain * @addr: tipc node identity of peer * @head_map: shows which other nodes currently consider peer 'up' * @domain: most recent domain record from peer * @hash: position in hashed lookup list * @list: position in linked list, in circular ascending order by 'addr' * @applied: number of reported domain members applied on this monitor list * @is_up: peer is up as seen from this node * @is_head: peer is assigned domain head as seen from this node * @is_local: peer is in local domain and should be continuously monitored * @down_cnt: - numbers of other peers which have reported this on lost */ struct tipc_peer { u32 addr; struct tipc_mon_domain *domain; struct hlist_node hash; struct list_head list; u8 applied; u8 down_cnt; bool is_up; bool is_head; bool is_local; }; struct tipc_monitor { struct hlist_head peers[NODE_HTABLE_SIZE]; int peer_cnt; struct tipc_peer *self; rwlock_t lock; struct tipc_mon_domain cache; u16 list_gen; u16 dom_gen; struct net *net; struct timer_list timer; unsigned long timer_intv; }; static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) { return tipc_net(net)->monitors[bearer_id]; } const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); static inline u16 mon_cpu_to_le16(u16 val) { return (__force __u16)htons(val); } static inline u32 mon_cpu_to_le32(u32 val) { return (__force __u32)htonl(val); } static inline u64 mon_cpu_to_le64(u64 val) { return (__force __u64)cpu_to_be64(val); } static inline u16 mon_le16_to_cpu(u16 val) { return ntohs((__force __be16)val); } static inline u32 mon_le32_to_cpu(u32 val) { return ntohl((__force __be32)val); } static inline u64 mon_le64_to_cpu(u64 val) { return be64_to_cpu((__force __be64)val); } /* dom_rec_len(): actual length of domain record for transport */ static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) { return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32)); } /* dom_size() : calculate size of own domain based on number of peers */ static int dom_size(int peers) { int i = 0; while ((i * i) < peers) i++; return min(i, MAX_MON_DOMAIN); } static void map_set(u64 *up_map, int i, unsigned int v) { *up_map &= ~(1ULL << i); *up_map |= ((u64)v << i); } static int map_get(u64 up_map, int i) { return (up_map & (1ULL << i)) >> i; } static struct tipc_peer *peer_prev(struct tipc_peer *peer) { return list_last_entry(&peer->list, struct tipc_peer, list); } static struct tipc_peer *peer_nxt(struct tipc_peer *peer) { return list_first_entry(&peer->list, struct tipc_peer, list); } static struct tipc_peer *peer_head(struct tipc_peer *peer) { while (!peer->is_head) peer = peer_prev(peer); return peer; } static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) { struct tipc_peer *peer; unsigned int thash = tipc_hashfn(addr); hlist_for_each_entry(peer, &mon->peers[thash], hash) { if (peer->addr == addr) return peer; } return NULL; } static struct tipc_peer *get_self(struct net *net, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); return mon->self; } static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) { struct tipc_net *tn = tipc_net(net); return mon->peer_cnt > tn->mon_threshold; } /* mon_identify_lost_members() : - identify amd mark potentially lost members */ static void mon_identify_lost_members(struct tipc_peer *peer, struct tipc_mon_domain *dom_bef, int applied_bef) { struct tipc_peer *member = peer; struct tipc_mon_domain *dom_aft = peer->domain; int applied_aft = peer->applied; int i; for (i = 0; i < applied_bef; i++) { member = peer_nxt(member); /* Do nothing if self or peer already see member as down */ if (!member->is_up || !map_get(dom_bef->up_map, i)) continue; /* Loss of local node must be detected by active probing */ if (member->is_local) continue; /* Start probing if member was removed from applied domain */ if (!applied_aft || (applied_aft < i)) { member->down_cnt = 1; continue; } /* Member loss is confirmed if it is still in applied domain */ if (!map_get(dom_aft->up_map, i)) member->down_cnt++; } } /* mon_apply_domain() : match a peer's domain record against monitor list */ static void mon_apply_domain(struct tipc_monitor *mon, struct tipc_peer *peer) { struct tipc_mon_domain *dom = peer->domain; struct tipc_peer *member; u32 addr; int i; if (!dom || !peer->is_up) return; /* Scan across domain members and match against monitor list */ peer->applied = 0; member = peer_nxt(peer); for (i = 0; i < dom->member_cnt; i++) { addr = dom->members[i]; if (addr != member->addr) return; peer->applied++; member = peer_nxt(member); } } /* mon_update_local_domain() : update after peer addition/removal/up/down */ static void mon_update_local_domain(struct tipc_monitor *mon) { struct tipc_peer *self = mon->self; struct tipc_mon_domain *cache = &mon->cache; struct tipc_mon_domain *dom = self->domain; struct tipc_peer *peer = self; u64 prev_up_map = dom->up_map; u16 member_cnt, i; bool diff; /* Update local domain size based on current size of cluster */ member_cnt = dom_size(mon->peer_cnt) - 1; self->applied = member_cnt; /* Update native and cached outgoing local domain records */ dom->len = dom_rec_len(dom, member_cnt); diff = dom->member_cnt != member_cnt; dom->member_cnt = member_cnt; for (i = 0; i < member_cnt; i++) { peer = peer_nxt(peer); diff |= dom->members[i] != peer->addr; dom->members[i] = peer->addr; map_set(&dom->up_map, i, peer->is_up); cache->members[i] = mon_cpu_to_le32(peer->addr); } diff |= dom->up_map != prev_up_map; if (!diff) return; dom->gen = ++mon->dom_gen; cache->len = mon_cpu_to_le16(dom->len); cache->gen = mon_cpu_to_le16(dom->gen); cache->member_cnt = mon_cpu_to_le16(member_cnt); cache->up_map = mon_cpu_to_le64(dom->up_map); mon_apply_domain(mon, self); } /* mon_update_neighbors() : update preceding neighbors of added/removed peer */ static void mon_update_neighbors(struct tipc_monitor *mon, struct tipc_peer *peer) { int dz, i; dz = dom_size(mon->peer_cnt); for (i = 0; i < dz; i++) { mon_apply_domain(mon, peer); peer = peer_prev(peer); } } /* mon_assign_roles() : reassign peer roles after a network change * The monitor list is consistent at this stage; i.e., each peer is monitoring * a set of domain members as matched between domain record and the monitor list */ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) { struct tipc_peer *peer = peer_nxt(head); struct tipc_peer *self = mon->self; int i = 0; for (; peer != self; peer = peer_nxt(peer)) { peer->is_local = false; /* Update domain member */ if (i++ < head->applied) { peer->is_head = false; if (head == self) peer->is_local = true; continue; } /* Assign next domain head */ if (!peer->is_up) continue; if (peer->is_head) break; head = peer; head->is_head = true; i = 0; } mon->list_gen++; } void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *prev, *head; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) goto exit; prev = peer_prev(peer); list_del(&peer->list); hlist_del(&peer->hash); kfree(peer->domain); kfree(peer); mon->peer_cnt--; head = peer_head(prev); if (head == self) mon_update_local_domain(mon); mon_update_neighbors(mon, prev); /* Revert to full-mesh monitoring if we reach threshold */ if (!tipc_mon_is_active(net, mon)) { list_for_each_entry(peer, &self->list, list) { kfree(peer->domain); peer->domain = NULL; peer->applied = 0; } } mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, struct tipc_peer **peer) { struct tipc_peer *self = mon->self; struct tipc_peer *cur, *prev, *p; p = kzalloc(sizeof(*p), GFP_ATOMIC); *peer = p; if (!p) return false; p->addr = addr; /* Add new peer to lookup list */ INIT_LIST_HEAD(&p->list); hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); /* Sort new peer into iterator list, in ascending circular order */ prev = self; list_for_each_entry(cur, &self->list, list) { if ((addr > prev->addr) && (addr < cur->addr)) break; if (((addr < cur->addr) || (addr > prev->addr)) && (prev->addr > cur->addr)) break; prev = cur; } list_add_tail(&p->list, &cur->list); mon->peer_cnt++; mon_update_neighbors(mon, p); return true; } void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self = get_self(net, bearer_id); struct tipc_peer *peer, *head; write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) goto exit; peer->is_up = true; head = peer_head(peer); if (head == self) mon_update_local_domain(mon); mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *head; struct tipc_mon_domain *dom; int applied; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) { pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); goto exit; } applied = peer->applied; peer->applied = 0; dom = peer->domain; peer->domain = NULL; if (peer->is_head) mon_identify_lost_members(peer, dom, applied); kfree(dom); peer->is_up = false; peer->is_head = false; peer->is_local = false; peer->down_cnt = 0; head = peer_head(peer); if (head == self) mon_update_local_domain(mon); mon_assign_roles(mon, head); exit: write_unlock_bh(&mon->lock); } /* tipc_mon_rcv - process monitor domain event message */ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_mon_domain *arrv_dom = data; struct tipc_mon_domain dom_bef; struct tipc_mon_domain *dom; struct tipc_peer *peer; u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt); int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); u16 new_gen = mon_le16_to_cpu(arrv_dom->gen); u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen); u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len); bool probing = state->probing; int i, applied_bef; state->probing = false; /* Sanity check received domain record */ if (new_member_cnt > MAX_MON_DOMAIN) return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) return; if (dlen < new_dlen || arrv_dlen != new_dlen) return; /* Synch generation numbers with peer if link just came up */ if (!state->synched) { state->peer_gen = new_gen - 1; state->acked_gen = acked_gen; state->synched = true; } if (more(acked_gen, state->acked_gen)) state->acked_gen = acked_gen; /* Drop duplicate unless we are waiting for a probe response */ if (!more(new_gen, state->peer_gen) && !probing) return; write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer || !peer->is_up) goto exit; /* Peer is confirmed, stop any ongoing probing */ peer->down_cnt = 0; /* Task is done for duplicate record */ if (!more(new_gen, state->peer_gen)) goto exit; state->peer_gen = new_gen; /* Cache current domain record for later use */ dom_bef.member_cnt = 0; dom = peer->domain; if (dom) memcpy(&dom_bef, dom, dom->len); /* Transform and store received domain record */ if (!dom || (dom->len < new_dlen)) { kfree(dom); dom = kmalloc(new_dlen, GFP_ATOMIC); peer->domain = dom; if (!dom) goto exit; } dom->len = new_dlen; dom->gen = new_gen; dom->member_cnt = new_member_cnt; dom->up_map = mon_le64_to_cpu(arrv_dom->up_map); for (i = 0; i < new_member_cnt; i++) dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]); /* Update peers affected by this domain record */ applied_bef = peer->applied; mon_apply_domain(mon, peer); mon_identify_lost_members(peer, &dom_bef, applied_bef); mon_assign_roles(mon, peer_head(peer)); exit: write_unlock_bh(&mon->lock); } void tipc_mon_prep(struct net *net, void *data, int *dlen, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_mon_domain *dom = data; u16 gen = mon->dom_gen; u16 len; /* Send invalid record if not active */ if (!tipc_mon_is_active(net, mon)) { dom->len = 0; return; } /* Send only a dummy record with ack if peer has acked our last sent */ if (likely(state->acked_gen == gen)) { len = dom_rec_len(dom, 0); *dlen = len; dom->len = mon_cpu_to_le16(len); dom->gen = mon_cpu_to_le16(gen); dom->ack_gen = mon_cpu_to_le16(state->peer_gen); dom->member_cnt = 0; return; } /* Send the full record */ read_lock_bh(&mon->lock); len = mon_le16_to_cpu(mon->cache.len); *dlen = len; memcpy(data, &mon->cache, len); read_unlock_bh(&mon->lock); dom->ack_gen = mon_cpu_to_le16(state->peer_gen); } void tipc_mon_get_state(struct net *net, u32 addr, struct tipc_mon_state *state, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; if (!tipc_mon_is_active(net, mon)) { state->probing = false; state->monitoring = true; return; } /* Used cached state if table has not changed */ if (!state->probing && (state->list_gen == mon->list_gen) && (state->acked_gen == mon->dom_gen)) return; read_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (peer) { state->probing = state->acked_gen != mon->dom_gen; state->probing |= peer->down_cnt; state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; state->monitoring = peer->is_local; state->monitoring |= peer->is_head; state->list_gen = mon->list_gen; } read_unlock_bh(&mon->lock); } static void mon_timeout(struct timer_list *t) { struct tipc_monitor *mon = from_timer(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; write_lock_bh(&mon->lock); self = mon->self; if (self && (best_member_cnt != self->applied)) { mon_update_local_domain(mon); mon_assign_roles(mon, self); } write_unlock_bh(&mon->lock); mod_timer(&mon->timer, jiffies + mon->timer_intv); } int tipc_mon_create(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon; struct tipc_peer *self; struct tipc_mon_domain *dom; if (tn->monitors[bearer_id]) return 0; mon = kzalloc(sizeof(*mon), GFP_ATOMIC); self = kzalloc(sizeof(*self), GFP_ATOMIC); dom = kzalloc(sizeof(*dom), GFP_ATOMIC); if (!mon || !self || !dom) { kfree(mon); kfree(self); kfree(dom); return -ENOMEM; } tn->monitors[bearer_id] = mon; rwlock_init(&mon->lock); mon->net = net; mon->peer_cnt = 1; mon->self = self; self->domain = dom; self->addr = tipc_own_addr(net); self->is_up = true; self->is_head = true; INIT_LIST_HEAD(&self->list); timer_setup(&mon->timer, mon_timeout, 0); mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); mod_timer(&mon->timer, jiffies + mon->timer_intv); return 0; } void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *self; struct tipc_peer *peer, *tmp; if (!mon) return; self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { list_del(&peer->list); hlist_del(&peer->hash); kfree(peer->domain); kfree(peer); } mon->self = NULL; write_unlock_bh(&mon->lock); timer_shutdown_sync(&mon->timer); kfree(self->domain); kfree(self); kfree(mon); } void tipc_mon_reinit_self(struct net *net) { struct tipc_monitor *mon; int bearer_id; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { mon = tipc_monitor(net, bearer_id); if (!mon) continue; write_lock_bh(&mon->lock); mon->self->addr = tipc_own_addr(net); write_unlock_bh(&mon->lock); } } int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); if (cluster_size > TIPC_CLUSTER_SIZE) return -EINVAL; tn->mon_threshold = cluster_size; return 0; } int tipc_nl_monitor_get_threshold(struct net *net) { struct tipc_net *tn = tipc_net(net); return tn->mon_threshold; } static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) { struct tipc_mon_domain *dom = peer->domain; struct nlattr *attrs; void *hdr; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MON_PEER_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); if (!attrs) goto msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) goto attr_msg_full; if (peer->is_up) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) goto attr_msg_full; if (peer->is_local) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) goto attr_msg_full; if (peer->is_head) if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) goto attr_msg_full; if (dom) { if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) goto attr_msg_full; if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, dom->up_map, TIPC_NLA_MON_PEER_PAD)) goto attr_msg_full; if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, dom->member_cnt * sizeof(u32), &dom->members)) goto attr_msg_full; } nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; if (!mon) return -EINVAL; read_lock_bh(&mon->lock); peer = mon->self; do { if (*prev_node) { if (peer->addr == *prev_node) *prev_node = 0; else continue; } if (__tipc_nl_add_monitor_peer(peer, msg)) { *prev_node = peer->addr; read_unlock_bh(&mon->lock); return -EMSGSIZE; } } while ((peer = peer_nxt(peer)) != mon->self); read_unlock_bh(&mon->lock); return 0; } int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); char bearer_name[TIPC_MAX_BEARER_NAME]; struct nlattr *attrs; void *hdr; int ret; ret = tipc_bearer_get_name(net, bearer_name, bearer_id); if (ret || !mon) return 0; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MON_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; read_lock_bh(&mon->lock); if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) goto attr_msg_full; if (tipc_mon_is_active(net, mon)) if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) goto attr_msg_full; read_unlock_bh(&mon->lock); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: read_unlock_bh(&mon->lock); nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; }
4 4 3 1 1 1 1 1 1 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/fs_context.c * * Copyright (C) 1992 Rick Sladkey * Conversion to new mount api Copyright (C) David Howells * * NFS mount handling. * * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com> */ #include <linux/compat.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <net/handshake.h> #include "nfs.h" #include "internal.h" #include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_MOUNT #if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 #endif #define NFS_MAX_CONNECTIONS 16 enum nfs_param { Opt_ac, Opt_acdirmax, Opt_acdirmin, Opt_acl, Opt_acregmax, Opt_acregmin, Opt_actimeo, Opt_addr, Opt_bg, Opt_bsize, Opt_clientaddr, Opt_cto, Opt_alignwrite, Opt_fg, Opt_fscache, Opt_fscache_flag, Opt_hard, Opt_intr, Opt_local_lock, Opt_lock, Opt_lookupcache, Opt_migration, Opt_minorversion, Opt_mountaddr, Opt_mounthost, Opt_mountport, Opt_mountproto, Opt_mountvers, Opt_namelen, Opt_nconnect, Opt_max_connect, Opt_port, Opt_posix, Opt_proto, Opt_rdirplus, Opt_rdma, Opt_resvport, Opt_retrans, Opt_retry, Opt_rsize, Opt_sec, Opt_sharecache, Opt_sloppy, Opt_soft, Opt_softerr, Opt_softreval, Opt_source, Opt_tcp, Opt_timeo, Opt_trunkdiscovery, Opt_udp, Opt_v, Opt_vers, Opt_wsize, Opt_write, Opt_xprtsec, }; enum { Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_none, Opt_local_lock_posix, }; static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; enum { Opt_lookupcache_all, Opt_lookupcache_none, Opt_lookupcache_positive, }; static const struct constant_table nfs_param_enums_lookupcache[] = { { "all", Opt_lookupcache_all }, { "none", Opt_lookupcache_none }, { "pos", Opt_lookupcache_positive }, { "positive", Opt_lookupcache_positive }, {} }; enum { Opt_write_lazy, Opt_write_eager, Opt_write_wait, }; static const struct constant_table nfs_param_enums_write[] = { { "lazy", Opt_write_lazy }, { "eager", Opt_write_eager }, { "wait", Opt_write_wait }, {} }; static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_flag_no("ac", Opt_ac), fsparam_u32 ("acdirmax", Opt_acdirmax), fsparam_u32 ("acdirmin", Opt_acdirmin), fsparam_flag_no("acl", Opt_acl), fsparam_u32 ("acregmax", Opt_acregmax), fsparam_u32 ("acregmin", Opt_acregmin), fsparam_u32 ("actimeo", Opt_actimeo), fsparam_string("addr", Opt_addr), fsparam_flag ("bg", Opt_bg), fsparam_u32 ("bsize", Opt_bsize), fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), fsparam_flag_no("alignwrite", Opt_alignwrite), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), fsparam_flag ("hard", Opt_hard), __fsparam(NULL, "intr", Opt_intr, fs_param_neg_with_no|fs_param_deprecated, NULL), fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock), fsparam_flag_no("lock", Opt_lock), fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache), fsparam_flag_no("migration", Opt_migration), fsparam_u32 ("minorversion", Opt_minorversion), fsparam_string("mountaddr", Opt_mountaddr), fsparam_string("mounthost", Opt_mounthost), fsparam_u32 ("mountport", Opt_mountport), fsparam_string("mountproto", Opt_mountproto), fsparam_u32 ("mountvers", Opt_mountvers), fsparam_u32 ("namlen", Opt_namelen), fsparam_u32 ("nconnect", Opt_nconnect), fsparam_u32 ("max_connect", Opt_max_connect), fsparam_string("nfsvers", Opt_vers), fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), fsparam_flag_no("rdirplus", Opt_rdirplus), fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), fsparam_string("retry", Opt_retry), fsparam_u32 ("rsize", Opt_rsize), fsparam_string("sec", Opt_sec), fsparam_flag_no("sharecache", Opt_sharecache), fsparam_flag ("sloppy", Opt_sloppy), fsparam_flag ("soft", Opt_soft), fsparam_flag ("softerr", Opt_softerr), fsparam_flag ("softreval", Opt_softreval), fsparam_string("source", Opt_source), fsparam_flag ("tcp", Opt_tcp), fsparam_u32 ("timeo", Opt_timeo), fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery), fsparam_flag ("udp", Opt_udp), fsparam_flag ("v2", Opt_v), fsparam_flag ("v3", Opt_v), fsparam_flag ("v4", Opt_v), fsparam_flag ("v4.0", Opt_v), fsparam_flag ("v4.1", Opt_v), fsparam_flag ("v4.2", Opt_v), fsparam_string("vers", Opt_vers), fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), fsparam_string("xprtsec", Opt_xprtsec), {} }; enum { Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, Opt_vers_4_1, Opt_vers_4_2, }; static const struct constant_table nfs_vers_tokens[] = { { "2", Opt_vers_2 }, { "3", Opt_vers_3 }, { "4", Opt_vers_4 }, { "4.0", Opt_vers_4_0 }, { "4.1", Opt_vers_4_1 }, { "4.2", Opt_vers_4_2 }, {} }; enum { Opt_xprt_rdma, Opt_xprt_rdma6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_udp, Opt_xprt_udp6, nr__Opt_xprt }; static const struct constant_table nfs_xprt_protocol_tokens[] = { { "rdma", Opt_xprt_rdma }, { "rdma6", Opt_xprt_rdma6 }, { "tcp", Opt_xprt_tcp }, { "tcp6", Opt_xprt_tcp6 }, { "udp", Opt_xprt_udp }, { "udp6", Opt_xprt_udp6 }, {} }; enum { Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, Opt_sec_none, Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, Opt_sec_sys, nr__Opt_sec }; static const struct constant_table nfs_secflavor_tokens[] = { { "krb5", Opt_sec_krb5 }, { "krb5i", Opt_sec_krb5i }, { "krb5p", Opt_sec_krb5p }, { "lkey", Opt_sec_lkey }, { "lkeyi", Opt_sec_lkeyi }, { "lkeyp", Opt_sec_lkeyp }, { "none", Opt_sec_none }, { "null", Opt_sec_none }, { "spkm3", Opt_sec_spkm }, { "spkm3i", Opt_sec_spkmi }, { "spkm3p", Opt_sec_spkmp }, { "sys", Opt_sec_sys }, {} }; enum { Opt_xprtsec_none, Opt_xprtsec_tls, Opt_xprtsec_mtls, nr__Opt_xprtsec }; static const struct constant_table nfs_xprtsec_policies[] = { { "none", Opt_xprtsec_none }, { "tls", Opt_xprtsec_tls }, { "mtls", Opt_xprtsec_mtls }, {} }; /* * Sanity-check a server address provided by the mount command. * * Address family must be initialized, and address must not be * the ANY address for that family. */ static int nfs_verify_server_address(struct sockaddr_storage *addr) { switch (addr->ss_family) { case AF_INET: { struct sockaddr_in *sa = (struct sockaddr_in *)addr; return sa->sin_addr.s_addr != htonl(INADDR_ANY); } case AF_INET6: { struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; return !ipv6_addr_any(sa); } } return 0; } #ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { return true; } #else static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { if (ctx->version == 4) return true; return false; } #endif /* * Sanity check the NFS transport protocol. */ static int nfs_validate_transport_protocol(struct fs_context *fc, struct nfs_fs_context *ctx) { switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: if (nfs_server_transport_udp_invalid(ctx)) goto out_invalid_transport_udp; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: break; default: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE) switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_TCP: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS; break; default: goto out_invalid_xprtsec_policy; } return 0; out_invalid_transport_udp: return nfs_invalf(fc, "NFS: Unsupported transport protocol udp"); out_invalid_xprtsec_policy: return nfs_invalf(fc, "NFS: Transport does not support xprtsec"); } /* * For text based NFSv2/v3 mounts, the mount protocol transport default * settings should depend upon the specified NFS transport. */ static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) { if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) return; switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; } } /* * Add 'flavor' to 'auth_info' if not already present. * Returns true if 'flavor' ends up in the list, false otherwise */ static int nfs_auth_info_add(struct fs_context *fc, struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { unsigned int i; unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { if (flavor == auth_info->flavors[i]) return 0; } if (auth_info->flavor_len + 1 >= max_flavor_len) return nfs_invalf(fc, "NFS: too many sec= flavors"); auth_info->flavors[auth_info->flavor_len++] = flavor; return 0; } /* * Parse the value of the 'sec=' option. */ static int nfs_parse_security_flavors(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); rpc_authflavor_t pseudoflavor; char *string = param->string, *p; int ret; trace_nfs_mount_assign(param->key, string); while ((p = strsep(&string, ":")) != NULL) { if (!*p) continue; switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { case Opt_sec_none: pseudoflavor = RPC_AUTH_NULL; break; case Opt_sec_sys: pseudoflavor = RPC_AUTH_UNIX; break; case Opt_sec_krb5: pseudoflavor = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: pseudoflavor = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: pseudoflavor = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: pseudoflavor = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: pseudoflavor = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: pseudoflavor = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: pseudoflavor = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: pseudoflavor = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: pseudoflavor = RPC_AUTH_GSS_SPKMP; break; default: return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); } ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); if (ret < 0) return ret; } return 0; } static int nfs_parse_xprtsec_policy(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); trace_nfs_mount_assign(param->key, param->string); switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) { case Opt_xprtsec_none: ctx->xprtsec.policy = RPC_XPRTSEC_NONE; break; case Opt_xprtsec_tls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON; break; case Opt_xprtsec_mtls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509; break; default: return nfs_invalf(fc, "NFS: Unrecognized transport security policy"); } return 0; } static int nfs_parse_version_string(struct fs_context *fc, const char *string) { struct nfs_fs_context *ctx = nfs_fc2context(fc); ctx->flags &= ~NFS_MOUNT_VER3; switch (lookup_constant(nfs_vers_tokens, string, -1)) { case Opt_vers_2: ctx->version = 2; break; case Opt_vers_3: ctx->flags |= NFS_MOUNT_VER3; ctx->version = 3; break; case Opt_vers_4: /* Backward compatibility option. In future, * the mount program should always supply * a NFSv4 minor version number. */ ctx->version = 4; break; case Opt_vers_4_0: ctx->version = 4; ctx->minorversion = 0; break; case Opt_vers_4_1: ctx->version = 4; ctx->minorversion = 1; break; case Opt_vers_4_2: ctx->version = 4; ctx->minorversion = 2; break; default: return nfs_invalf(fc, "NFS: Unsupported NFS version"); } return 0; } /* * Parse a single mount parameter. */ static int nfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct nfs_fs_context *ctx = nfs_fc2context(fc); unsigned short protofamily, mountfamily; unsigned int len; int ret, opt; trace_nfs_mount_option(param); opt = fs_parse(fc, nfs_fs_parameters, param, &result); if (opt < 0) return (opt == -ENOPARAM && ctx->sloppy) ? 1 : opt; if (fc->security) ctx->has_sec_mnt_opts = 1; switch (opt) { case Opt_source: if (fc->source) return nfs_invalf(fc, "NFS: Multiple sources not supported"); fc->source = param->string; param->string = NULL; break; /* * boolean options: foo/nofoo */ case Opt_soft: ctx->flags |= NFS_MOUNT_SOFT; ctx->flags &= ~NFS_MOUNT_SOFTERR; break; case Opt_softerr: ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; ctx->flags &= ~NFS_MOUNT_SOFT; break; case Opt_hard: ctx->flags &= ~(NFS_MOUNT_SOFT | NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL); break; case Opt_softreval: if (result.negated) ctx->flags &= ~NFS_MOUNT_SOFTREVAL; else ctx->flags |= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) ctx->flags &= ~NFS_MOUNT_POSIX; else ctx->flags |= NFS_MOUNT_POSIX; break; case Opt_cto: if (result.negated) ctx->flags |= NFS_MOUNT_NOCTO; else ctx->flags &= ~NFS_MOUNT_NOCTO; break; case Opt_trunkdiscovery: if (result.negated) ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY; else ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; break; case Opt_alignwrite: if (result.negated) ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE; else ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE; break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; else ctx->flags &= ~NFS_MOUNT_NOAC; break; case Opt_lock: if (result.negated) { ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } break; case Opt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ ret = xprt_find_transport_ident(param->key); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; case Opt_acl: if (result.negated) ctx->flags |= NFS_MOUNT_NOACL; else ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: if (result.negated) ctx->flags |= NFS_MOUNT_NORDIRPLUS; else ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; break; case Opt_sharecache: if (result.negated) ctx->flags |= NFS_MOUNT_UNSHARED; else ctx->flags &= ~NFS_MOUNT_UNSHARED; break; case Opt_resvport: if (result.negated) ctx->flags |= NFS_MOUNT_NORESVPORT; else ctx->flags &= ~NFS_MOUNT_NORESVPORT; break; case Opt_fscache_flag: if (result.negated) ctx->options &= ~NFS_OPTION_FSCACHE; else ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = NULL; break; case Opt_fscache: trace_nfs_mount_assign(param->key, param->string); ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = param->string; param->string = NULL; break; case Opt_migration: if (result.negated) ctx->options &= ~NFS_OPTION_MIGRATION; else ctx->options |= NFS_OPTION_MIGRATION; break; /* * options that take numeric values */ case Opt_port: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->nfs_server.port = result.uint_32; break; case Opt_rsize: ctx->rsize = result.uint_32; break; case Opt_wsize: ctx->wsize = result.uint_32; break; case Opt_bsize: ctx->bsize = result.uint_32; break; case Opt_timeo: if (result.uint_32 < 1 || result.uint_32 > INT_MAX) goto out_of_bounds; ctx->timeo = result.uint_32; break; case Opt_retrans: if (result.uint_32 > INT_MAX) goto out_of_bounds; ctx->retrans = result.uint_32; break; case Opt_acregmin: ctx->acregmin = result.uint_32; break; case Opt_acregmax: ctx->acregmax = result.uint_32; break; case Opt_acdirmin: ctx->acdirmin = result.uint_32; break; case Opt_acdirmax: ctx->acdirmax = result.uint_32; break; case Opt_actimeo: ctx->acregmin = result.uint_32; ctx->acregmax = result.uint_32; ctx->acdirmin = result.uint_32; ctx->acdirmax = result.uint_32; break; case Opt_namelen: ctx->namlen = result.uint_32; break; case Opt_mountport: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->mount_server.port = result.uint_32; break; case Opt_mountvers: if (result.uint_32 < NFS_MNT_VERSION || result.uint_32 > NFS_MNT3_VERSION) goto out_of_bounds; ctx->mount_server.version = result.uint_32; break; case Opt_minorversion: if (result.uint_32 > NFS4_MAX_MINOR_VERSION) goto out_of_bounds; ctx->minorversion = result.uint_32; break; /* * options that take text values */ case Opt_v: ret = nfs_parse_version_string(fc, param->key + 1); if (ret < 0) return ret; break; case Opt_vers: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); ret = nfs_parse_version_string(fc, param->string); if (ret < 0) return ret; break; case Opt_sec: ret = nfs_parse_security_flavors(fc, param); if (ret < 0) return ret; break; case Opt_xprtsec: ret = nfs_parse_xprtsec_policy(fc, param); if (ret < 0) return ret; break; case Opt_proto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); protofamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; fallthrough; case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; ret = xprt_find_transport_ident(param->string); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; default: goto out_bad_transport; } ctx->protofamily = protofamily; break; case Opt_mountproto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); mountfamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma: /* not used for side protocols */ default: goto out_bad_transport; } ctx->mountfamily = mountfamily; break; case Opt_addr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address)); if (len == 0) goto out_invalid_address; ctx->nfs_server.addrlen = len; break; case Opt_clientaddr: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->client_address); ctx->client_address = param->string; param->string = NULL; break; case Opt_mounthost: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->mount_server.hostname); ctx->mount_server.hostname = param->string; param->string = NULL; break; case Opt_mountaddr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->mount_server.address, sizeof(ctx->mount_server._address)); if (len == 0) goto out_invalid_address; ctx->mount_server.addrlen = len; break; case Opt_nconnect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; case Opt_max_connect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; case Opt_lookupcache_positive: ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; break; case Opt_lookupcache_none: ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: goto out_invalid_value; } break; case Opt_local_lock: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_local_lock_flock: ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; break; case Opt_local_lock_posix: ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; break; case Opt_local_lock_none: ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; default: goto out_invalid_value; } break; case Opt_write: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_write_lazy: ctx->flags &= ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT); break; case Opt_write_eager: ctx->flags |= NFS_MOUNT_WRITE_EAGER; ctx->flags &= ~NFS_MOUNT_WRITE_WAIT; break; case Opt_write_wait: ctx->flags |= NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT; break; default: goto out_invalid_value; } break; /* * Special options */ case Opt_sloppy: ctx->sloppy = true; break; } return 0; out_invalid_value: return nfs_invalf(fc, "NFS: Bad mount option value specified"); out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); out_bad_transport: return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } /* * Split fc->source into "hostname:export_path". * * The leftmost colon demarks the split between the server's hostname * and the export path. If the hostname starts with a left square * bracket, then it may contain colons. * * Note: caller frees hostname and export path, even on error. */ static int nfs_parse_source(struct fs_context *fc, size_t maxnamlen, size_t maxpathlen) { struct nfs_fs_context *ctx = nfs_fc2context(fc); const char *dev_name = fc->source; size_t len; const char *end; if (unlikely(!dev_name || !*dev_name)) return -EINVAL; /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); if (end == NULL || end[1] != ':') goto out_bad_devname; len = end - dev_name; end++; } else { const char *comma; end = strchr(dev_name, ':'); if (end == NULL) goto out_bad_devname; len = end - dev_name; /* kill possible hostname list: not supported */ comma = memchr(dev_name, ',', len); if (comma) len = comma - dev_name; } if (len > maxnamlen) goto out_hostname; kfree(ctx->nfs_server.hostname); /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; len = strlen(++end); if (len > maxpathlen) goto out_path; ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); if (!ctx->nfs_server.export_path) goto out_nomem; trace_nfs_mount_path(ctx->nfs_server.export_path); return 0; out_bad_devname: return nfs_invalf(fc, "NFS: device name not in host:path format"); out_nomem: nfs_errorf(fc, "NFS: not enough memory to parse device name"); return -ENOMEM; out_hostname: nfs_errorf(fc, "NFS: server hostname too long"); return -ENAMETOOLONG; out_path: nfs_errorf(fc, "NFS: export pathname too long"); return -ENAMETOOLONG; } static inline bool is_remount_fc(struct fs_context *fc) { return fc->root != NULL; } /* * Parse monolithic NFS2/NFS3 mount data * - fills in the mount root filehandle * * For option strings, user space handles the following behaviors: * * + DNS: mapping server host name to IP address ("addr=" option) * * + failure mode: how to behave if a mount request can't be handled * immediately ("fg/bg" option) * * + retry: how often to retry a mount request ("retry=" option) * * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on */ static int nfs23_parse_monolithic(struct fs_context *fc, struct nfs_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_fh *mntfh = ctx->mntfh; struct sockaddr_storage *sap = &ctx->nfs_server._address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; int ret; if (data == NULL) goto out_no_data; ctx->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; fallthrough; case 2: data->bsize = 0; fallthrough; case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; fallthrough; case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; fallthrough; case 5: memset(data->context, 0, sizeof(data->context)); fallthrough; case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) goto out_invalid_fh; mntfh->size = data->root.size; ctx->version = 3; } else { mntfh->size = NFS2_FHSIZE; ctx->version = 2; } memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); /* * for proto == XPRT_TRANSPORT_UDP, which is what uses * to_exponential, implying shift: limit the shift value * to BITS_PER_LONG (majortimeo is unsigned long) */ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */ if (data->retrans >= 64) /* shift value is too large */ goto out_invalid_data; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; ctx->flags |= extra_flags; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->need_mount = false; if (!is_remount_fc(fc)) { memcpy(sap, &data->addr, sizeof(data->addr)); ctx->nfs_server.addrlen = sizeof(data->addr); ctx->nfs_server.port = ntohs(data->addr.sin_port); } if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; ctx->namlen = data->namlen; ctx->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) ctx->selected_flavor = data->pseudoflavor; else ctx->selected_flavor = RPC_AUTH_UNIX; if (!(data->flags & NFS_MOUNT_NONLM)) ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); else ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the * kernel. To continue to support that functionality we * have a touch of selinux knowledge here in the NFS code. The * userspace code converted context=blah to just blah so we are * converting back to the full string selinux understands. */ if (data->context[0]){ #ifdef CONFIG_SECURITY_SELINUX int ret; data->context[NFS_MAX_CONTEXT_LEN] = '\0'; ret = vfs_parse_fs_string(fc, "context", data->context, strlen(data->context)); if (ret < 0) return ret; #else return -EINVAL; #endif } break; default: goto generic; } ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; ctx->skip_reconfig_option_check = true; return 0; generic: return generic_parse_monolithic(fc, data); out_no_data: if (is_remount_fc(fc)) { ctx->skip_reconfig_option_check = true; return 0; } return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); out_no_v3: return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); out_no_sec: return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); out_nomem: return -ENOMEM; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_invalid_fh: return nfs_invalf(fc, "NFS: invalid root filehandle"); out_invalid_data: return nfs_invalf(fc, "NFS: invalid binary mount data"); } #if IS_ENABLED(CONFIG_NFS_V4) struct compat_nfs_string { compat_uint_t len; compat_uptr_t data; }; static inline void compat_nfs_string(struct nfs_string *dst, struct compat_nfs_string *src) { dst->data = compat_ptr(src->data); dst->len = src->len; } struct compat_nfs4_mount_data_v1 { compat_int_t version; compat_int_t flags; compat_int_t rsize; compat_int_t wsize; compat_int_t timeo; compat_int_t retrans; compat_int_t acregmin; compat_int_t acregmax; compat_int_t acdirmin; compat_int_t acdirmax; struct compat_nfs_string client_addr; struct compat_nfs_string mnt_path; struct compat_nfs_string hostname; compat_uint_t host_addrlen; compat_uptr_t host_addr; compat_int_t proto; compat_int_t auth_flavourlen; compat_uptr_t auth_flavours; }; static void nfs4_compat_mount_data_conv(struct nfs4_mount_data *data) { struct compat_nfs4_mount_data_v1 *compat = (struct compat_nfs4_mount_data_v1 *)data; /* copy the fields backwards */ data->auth_flavours = compat_ptr(compat->auth_flavours); data->auth_flavourlen = compat->auth_flavourlen; data->proto = compat->proto; data->host_addr = compat_ptr(compat->host_addr); data->host_addrlen = compat->host_addrlen; compat_nfs_string(&data->hostname, &compat->hostname); compat_nfs_string(&data->mnt_path, &compat->mnt_path); compat_nfs_string(&data->client_addr, &compat->client_addr); data->acdirmax = compat->acdirmax; data->acdirmin = compat->acdirmin; data->acregmax = compat->acregmax; data->acregmin = compat->acregmin; data->retrans = compat->retrans; data->timeo = compat->timeo; data->wsize = compat->wsize; data->rsize = compat->rsize; data->flags = compat->flags; data->version = compat->version; } /* * Validate NFSv4 mount options */ static int nfs4_parse_monolithic(struct fs_context *fc, struct nfs4_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct sockaddr_storage *sap = &ctx->nfs_server._address; int ret; char *c; if (!data) { if (is_remount_fc(fc)) goto done; return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); } ctx->version = 4; if (data->version != 1) return generic_parse_monolithic(fc, data); if (in_compat_syscall()) nfs4_compat_mount_data_conv(data); if (data->host_addrlen > sizeof(ctx->nfs_server.address)) goto out_no_address; if (data->host_addrlen == 0) goto out_no_address; ctx->nfs_server.addrlen = data->host_addrlen; if (copy_from_user(sap, data->host_addr, data->host_addrlen)) return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { rpc_authflavor_t pseudoflavor; if (data->auth_flavourlen > 1) goto out_inval_auth; if (copy_from_user(&pseudoflavor, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; ctx->selected_flavor = pseudoflavor; } else { ctx->selected_flavor = RPC_AUTH_UNIX; } c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.export_path = c; trace_nfs_mount_path(c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); ctx->client_address = c; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->nfs_server.protocol = data->proto; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; done: ctx->skip_reconfig_option_check = true; return 0; out_inval_auth: return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", data->auth_flavourlen); out_no_address: return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); } #endif /* * Parse a monolithic block of data from sys_mount(). */ static int nfs_fs_context_parse_monolithic(struct fs_context *fc, void *data) { if (fc->fs_type == &nfs_fs_type) return nfs23_parse_monolithic(fc, data); #if IS_ENABLED(CONFIG_NFS_V4) if (fc->fs_type == &nfs4_fs_type) return nfs4_parse_monolithic(fc, data); #endif return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); } /* * Validate the preparsed information in the config. */ static int nfs_fs_context_validate(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_subversion *nfs_mod; struct sockaddr_storage *sap = &ctx->nfs_server._address; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; int port = 0; int ret; if (!fc->source) goto out_no_device_name; /* Check for sanity first. */ if (ctx->minorversion && ctx->version != 4) goto out_minorversion_mismatch; if (ctx->options & NFS_OPTION_MIGRATION && (ctx->version != 4 || ctx->minorversion != 0)) goto out_migration_misuse; /* Verify that any proto=/mountproto= options match the address * families in the addr=/mountaddr= options. */ if (ctx->protofamily != AF_UNSPEC && ctx->protofamily != ctx->nfs_server.address.sa_family) goto out_proto_mismatch; if (ctx->mountfamily != AF_UNSPEC) { if (ctx->mount_server.addrlen) { if (ctx->mountfamily != ctx->mount_server.address.sa_family) goto out_mountproto_mismatch; } else { if (ctx->mountfamily != ctx->nfs_server.address.sa_family) goto out_mountproto_mismatch; } } if (!nfs_verify_server_address(sap)) goto out_no_address; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; if (ctx->version == 4) { if (IS_ENABLED(CONFIG_NFS_V4)) { if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; else port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { goto out_v4_not_compiled; } } else { nfs_set_mount_transport_protocol(ctx); if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; } nfs_set_port(sap, &ctx->nfs_server.port, port); ret = nfs_parse_source(fc, max_namelen, max_pathlen); if (ret < 0) return ret; /* Load the NFS protocol module if we haven't done so yet */ if (!ctx->nfs_mod) { nfs_mod = get_nfs_version(ctx->version); if (IS_ERR(nfs_mod)) { ret = PTR_ERR(nfs_mod); goto out_version_unavailable; } ctx->nfs_mod = nfs_mod; } /* Ensure the filesystem context has the correct fs_type */ if (fc->fs_type != ctx->nfs_mod->nfs_fs) { module_put(fc->fs_type->owner); __module_get(ctx->nfs_mod->nfs_fs->owner); fc->fs_type = ctx->nfs_mod->nfs_fs; } return 0; out_no_device_name: return nfs_invalf(fc, "NFS: Device name not specified"); out_v4_not_compiled: nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); return -EPROTONOSUPPORT; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_mountproto_mismatch: return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); out_proto_mismatch: return nfs_invalf(fc, "NFS: Server address does not match proto= option"); out_minorversion_mismatch: return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", ctx->version, ctx->minorversion); out_migration_misuse: return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); out_version_unavailable: nfs_errorf(fc, "NFS: Version unavailable"); return ret; } /* * Create an NFS superblock by the appropriate method. */ static int nfs_get_tree(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); int err = nfs_fs_context_validate(fc); if (err) return err; if (!ctx->internal) return ctx->nfs_mod->rpc_ops->try_get_tree(fc); else return nfs_get_tree_common(fc); } /* * Handle duplication of a configuration. The caller copied *src into *sc, but * it can't deal with resource pointers in the filesystem context, so we have * to do that. We need to clear pointers, copy data or get extra refs as * appropriate. */ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (!ctx->mntfh) { kfree(ctx); return -ENOMEM; } nfs_copy_fh(ctx->mntfh, src->mntfh); __module_get(ctx->nfs_mod->owner); ctx->client_address = NULL; ctx->mount_server.hostname = NULL; ctx->nfs_server.export_path = NULL; ctx->nfs_server.hostname = NULL; ctx->fscache_uniq = NULL; ctx->clone_data.fattr = NULL; fc->fs_private = ctx; return 0; } static void nfs_fs_context_free(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); if (ctx) { if (ctx->server) nfs_free_server(ctx->server); if (ctx->nfs_mod) put_nfs_version(ctx->nfs_mod); kfree(ctx->client_address); kfree(ctx->mount_server.hostname); kfree(ctx->nfs_server.export_path); kfree(ctx->nfs_server.hostname); kfree(ctx->fscache_uniq); nfs_free_fhandle(ctx->mntfh); nfs_free_fattr(ctx->clone_data.fattr); kfree(ctx); } } static const struct fs_context_operations nfs_fs_context_ops = { .free = nfs_fs_context_free, .dup = nfs_fs_context_dup, .parse_param = nfs_fs_context_parse_param, .parse_monolithic = nfs_fs_context_parse_monolithic, .get_tree = nfs_get_tree, .reconfigure = nfs_reconfigure, }; /* * Prepare superblock configuration. We use the namespaces attached to the * context. This may be the current process's namespaces, or it may be a * container's namespaces. */ static int nfs_init_fs_context(struct fs_context *fc) { struct nfs_fs_context *ctx; ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); if (unlikely(!ctx)) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (unlikely(!ctx->mntfh)) { kfree(ctx); return -ENOMEM; } ctx->protofamily = AF_UNSPEC; ctx->mountfamily = AF_UNSPEC; ctx->mount_server.port = NFS_UNSPEC_PORT; if (fc->root) { /* reconfigure, start with the current config */ struct nfs_server *nfss = fc->root->d_sb->s_fs_info; struct net *net = nfss->nfs_client->cl_net; ctx->flags = nfss->flags; ctx->rsize = nfss->rsize; ctx->wsize = nfss->wsize; ctx->retrans = nfss->client->cl_timeout->to_retries; ctx->selected_flavor = nfss->client->cl_auth->au_flavor; ctx->acregmin = nfss->acregmin / HZ; ctx->acregmax = nfss->acregmax / HZ; ctx->acdirmin = nfss->acdirmin / HZ; ctx->acdirmax = nfss->acdirmax / HZ; ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; ctx->nfs_server.port = nfss->port; ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; ctx->version = nfss->nfs_client->rpc_ops->version; ctx->minorversion = nfss->nfs_client->cl_minorversion; memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr, ctx->nfs_server.addrlen); if (fc->net_ns != net) { put_net(fc->net_ns); fc->net_ns = get_net(net); } ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; __module_get(ctx->nfs_mod->owner); } else { /* defaults */ ctx->timeo = NFS_UNSPEC_TIMEO; ctx->retrans = NFS_UNSPEC_RETRANS; ctx->acregmin = NFS_DEF_ACREGMIN; ctx->acregmax = NFS_DEF_ACREGMAX; ctx->acdirmin = NFS_DEF_ACDIRMIN; ctx->acdirmax = NFS_DEF_ACDIRMAX; ctx->nfs_server.port = NFS_UNSPEC_PORT; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; ctx->minorversion = 0; ctx->need_mount = true; ctx->xprtsec.policy = RPC_XPRTSEC_NONE; ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; fc->ops = &nfs_fs_context_ops; return 0; } struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); #if IS_ENABLED(CONFIG_NFS_V4) struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs4"); MODULE_ALIAS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); #endif /* CONFIG_NFS_V4 */
100 175 174 1 1 2 2 3 3 175 1 123 118 1 3 5 1 4 118 1 71 109 2 1 97 13 104 104 98 101 89 2 89 7 1 2 7 2 92 2 1 2 1 90 1 88 86 3 82 84 83 81 82 76 77 77 75 75 75 69 68 64 62 1 3 59 57 57 99 45 2 197 263 263 1 263 262 1 263 262 2 2 1 3 260 4 259 1 1 2 259 262 261 262 2 1 1 2 2 1 1 2 1 1 1 3 1 249 20 20 2 2 12 7 8 8 1 12 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/bond/bond_netlink.c - Netlink interface for bonding * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_link.h> #include <linux/if_ether.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include <net/bonding.h> #include <net/ipv6.h> static size_t bond_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_STATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_MII_STATUS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_SLAVE_LINK_FAILURE_COUNT */ nla_total_size(MAX_ADDR_LEN) + /* IFLA_BOND_SLAVE_PERM_HWADDR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_QUEUE_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ 0; } static int bond_fill_slave_info(struct sk_buff *skb, const struct net_device *bond_dev, const struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); if (nla_put_u8(skb, IFLA_BOND_SLAVE_STATE, bond_slave_state(slave))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_MII_STATUS, slave->link)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, slave->link_failure_count)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_SLAVE_PERM_HWADDR, slave_dev->addr_len, slave->perm_hwaddr)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, READ_ONCE(slave->queue_id))) goto nla_put_failure; if (nla_put_s32(skb, IFLA_BOND_SLAVE_PRIO, slave->prio)) goto nla_put_failure; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { const struct aggregator *agg; const struct port *ad_port; ad_port = &SLAVE_AD_INFO(slave)->port; agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) goto nla_put_failure; } } return 0; nla_put_failure: return -EMSGSIZE; } /* Limit the max delay range to 300s */ static const struct netlink_range_validation delay_range = { .max = 300000, }; static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MODE] = { .type = NLA_U8 }, [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_MIIMON] = { .type = NLA_U32 }, [IFLA_BOND_UPDELAY] = { .type = NLA_U32 }, [IFLA_BOND_DOWNDELAY] = { .type = NLA_U32 }, [IFLA_BOND_USE_CARRIER] = { .type = NLA_U8 }, [IFLA_BOND_ARP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_ARP_IP_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_ARP_VALIDATE] = { .type = NLA_U32 }, [IFLA_BOND_ARP_ALL_TARGETS] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY_RESELECT] = { .type = NLA_U8 }, [IFLA_BOND_FAIL_OVER_MAC] = { .type = NLA_U8 }, [IFLA_BOND_XMIT_HASH_POLICY] = { .type = NLA_U8 }, [IFLA_BOND_RESEND_IGMP] = { .type = NLA_U32 }, [IFLA_BOND_NUM_PEER_NOTIF] = { .type = NLA_U8 }, [IFLA_BOND_ALL_SLAVES_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } return 0; } static int bond_slave_changelink(struct net_device *bond_dev, struct net_device *slave_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int err; if (!data) return 0; if (data[IFLA_BOND_SLAVE_QUEUE_ID]) { u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]); char queue_id_str[IFNAMSIZ + 7]; /* queue_id option setting expects slave_name:queue_id */ snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n", slave_dev->name, queue_id); bond_opt_initstr(&newval, queue_id_str); err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval, data[IFLA_BOND_SLAVE_QUEUE_ID], extack); if (err) return err; } if (data[IFLA_BOND_SLAVE_PRIO]) { int prio = nla_get_s32(data[IFLA_BOND_SLAVE_PRIO]); bond_opt_slave_initval(&newval, &slave_dev, prio); err = __bond_opt_set(bond, BOND_OPT_PRIO, &newval, data[IFLA_BOND_SLAVE_PRIO], extack); if (err) return err; } return 0; } static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int miimon = 0; int err; if (!data) return 0; if (data[IFLA_BOND_MODE]) { int mode = nla_get_u8(data[IFLA_BOND_MODE]); bond_opt_initval(&newval, mode); err = __bond_opt_set(bond, BOND_OPT_MODE, &newval, data[IFLA_BOND_MODE], extack); if (err) return err; } if (data[IFLA_BOND_ACTIVE_SLAVE]) { int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); struct net_device *slave_dev; char *active_slave = ""; if (ifindex != 0) { slave_dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (!slave_dev) return -ENODEV; active_slave = slave_dev->name; } bond_opt_initstr(&newval, active_slave); err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval, data[IFLA_BOND_ACTIVE_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_MIIMON]) { miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); bond_opt_initval(&newval, miimon); err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval, data[IFLA_BOND_MIIMON], extack); if (err) return err; } if (data[IFLA_BOND_UPDELAY]) { int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); bond_opt_initval(&newval, updelay); err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval, data[IFLA_BOND_UPDELAY], extack); if (err) return err; } if (data[IFLA_BOND_DOWNDELAY]) { int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); bond_opt_initval(&newval, downdelay); err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval, data[IFLA_BOND_DOWNDELAY], extack); if (err) return err; } if (data[IFLA_BOND_PEER_NOTIF_DELAY]) { int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]); bond_opt_initval(&newval, delay); err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval, data[IFLA_BOND_PEER_NOTIF_DELAY], extack); if (err) return err; } if (data[IFLA_BOND_USE_CARRIER]) { int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); bond_opt_initval(&newval, use_carrier); err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval, data[IFLA_BOND_USE_CARRIER], extack); if (err) return err; } if (data[IFLA_BOND_ARP_INTERVAL]) { int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); if (arp_interval && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP monitoring cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_interval); err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval, data[IFLA_BOND_ARP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_ARP_IP_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_arp_ip_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { __be32 target; if (nla_len(attr) < sizeof(target)) return -EINVAL; target = nla_get_be32(attr); bond_opt_initval(&newval, (__force u64)target); err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, &newval, data[IFLA_BOND_ARP_IP_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n"); if (err) return err; } #if IS_ENABLED(CONFIG_IPV6) if (data[IFLA_BOND_NS_IP6_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_ns_ip6_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_NS_IP6_TARGET], rem) { struct in6_addr addr6; if (nla_len(attr) < sizeof(addr6)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address"); return -EINVAL; } addr6 = nla_get_in6_addr(attr); bond_opt_initextra(&newval, &addr6, sizeof(addr6)); err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, &newval, data[IFLA_BOND_NS_IP6_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last ns target with arp_interval on\n"); if (err) return err; } #endif if (data[IFLA_BOND_ARP_VALIDATE]) { int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); if (arp_validate && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP validating cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_validate); err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval, data[IFLA_BOND_ARP_VALIDATE], extack); if (err) return err; } if (data[IFLA_BOND_ARP_ALL_TARGETS]) { int arp_all_targets = nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); bond_opt_initval(&newval, arp_all_targets); err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval, data[IFLA_BOND_ARP_ALL_TARGETS], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY]) { int ifindex = nla_get_u32(data[IFLA_BOND_PRIMARY]); struct net_device *dev; char *primary = ""; dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (dev) primary = dev->name; bond_opt_initstr(&newval, primary); err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval, data[IFLA_BOND_PRIMARY], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY_RESELECT]) { int primary_reselect = nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); bond_opt_initval(&newval, primary_reselect); err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval, data[IFLA_BOND_PRIMARY_RESELECT], extack); if (err) return err; } if (data[IFLA_BOND_FAIL_OVER_MAC]) { int fail_over_mac = nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); bond_opt_initval(&newval, fail_over_mac); err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval, data[IFLA_BOND_FAIL_OVER_MAC], extack); if (err) return err; } if (data[IFLA_BOND_XMIT_HASH_POLICY]) { int xmit_hash_policy = nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); bond_opt_initval(&newval, xmit_hash_policy); err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval, data[IFLA_BOND_XMIT_HASH_POLICY], extack); if (err) return err; } if (data[IFLA_BOND_RESEND_IGMP]) { int resend_igmp = nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); bond_opt_initval(&newval, resend_igmp); err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval, data[IFLA_BOND_RESEND_IGMP], extack); if (err) return err; } if (data[IFLA_BOND_NUM_PEER_NOTIF]) { int num_peer_notif = nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); bond_opt_initval(&newval, num_peer_notif); err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval, data[IFLA_BOND_NUM_PEER_NOTIF], extack); if (err) return err; } if (data[IFLA_BOND_ALL_SLAVES_ACTIVE]) { int all_slaves_active = nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); bond_opt_initval(&newval, all_slaves_active); err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval, data[IFLA_BOND_ALL_SLAVES_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_MIN_LINKS]) { int min_links = nla_get_u32(data[IFLA_BOND_MIN_LINKS]); bond_opt_initval(&newval, min_links); err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval, data[IFLA_BOND_MIN_LINKS], extack); if (err) return err; } if (data[IFLA_BOND_LP_INTERVAL]) { int lp_interval = nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); bond_opt_initval(&newval, lp_interval); err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval, data[IFLA_BOND_LP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_PACKETS_PER_SLAVE]) { int packets_per_slave = nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); bond_opt_initval(&newval, packets_per_slave); err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval, data[IFLA_BOND_PACKETS_PER_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_ACTIVE]) { int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); bond_opt_initval(&newval, lacp_active); err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval, data[IFLA_BOND_AD_LACP_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); bond_opt_initval(&newval, lacp_rate); err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval, data[IFLA_BOND_AD_LACP_RATE], extack); if (err) return err; } if (data[IFLA_BOND_AD_SELECT]) { int ad_select = nla_get_u8(data[IFLA_BOND_AD_SELECT]); bond_opt_initval(&newval, ad_select); err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval, data[IFLA_BOND_AD_SELECT], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { int actor_sys_prio = nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); bond_opt_initval(&newval, actor_sys_prio); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval, data[IFLA_BOND_AD_ACTOR_SYS_PRIO], extack); if (err) return err; } if (data[IFLA_BOND_AD_USER_PORT_KEY]) { int port_key = nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); bond_opt_initval(&newval, port_key); err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval, data[IFLA_BOND_AD_USER_PORT_KEY], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) return -EINVAL; bond_opt_initval(&newval, nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval, data[IFLA_BOND_AD_ACTOR_SYSTEM], extack); if (err) return err; } if (data[IFLA_BOND_TLB_DYNAMIC_LB]) { int dynamic_lb = nla_get_u8(data[IFLA_BOND_TLB_DYNAMIC_LB]); bond_opt_initval(&newval, dynamic_lb); err = __bond_opt_set(bond, BOND_OPT_TLB_DYNAMIC_LB, &newval, data[IFLA_BOND_TLB_DYNAMIC_LB], extack); if (err) return err; } if (data[IFLA_BOND_MISSED_MAX]) { int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); bond_opt_initval(&newval, missed_max); err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval, data[IFLA_BOND_MISSED_MAX], extack); if (err) return err; } if (data[IFLA_BOND_COUPLED_CONTROL]) { int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]); bond_opt_initval(&newval, coupled_control); err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval, data[IFLA_BOND_COUPLED_CONTROL], extack); if (err) return err; } return 0; } static int bond_newlink(struct net *src_net, struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { int err; err = bond_changelink(bond_dev, tb, data, extack); if (err < 0) return err; err = register_netdevice(bond_dev); if (!err) { struct bonding *bond = netdev_priv(bond_dev); netif_carrier_off(bond_dev); bond_work_init_all(bond); } return err; } static size_t bond_get_size(const struct net_device *bond_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_MODE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ACTIVE_SLAVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIIMON */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_UPDELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_DOWNDELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_USE_CARRIER */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_INTERVAL */ /* IFLA_BOND_ARP_IP_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(u32)) * BOND_MAX_ARP_TARGETS + nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_VALIDATE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_ALL_TARGETS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PRIMARY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_PRIMARY_RESELECT */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_FAIL_OVER_MAC */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_XMIT_HASH_POLICY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_RESEND_IGMP */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_NUM_PEER_NOTIF */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_ALL_SLAVES_ACTIVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_AGGREGATOR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_NUM_PORTS */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ /* IFLA_BOND_NS_IP6_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ 0; } static int bond_option_active_slave_get_ifindex(struct bonding *bond) { const struct net_device *slave; int ifindex; rcu_read_lock(); slave = bond_option_active_slave_get_rcu(bond); ifindex = slave ? slave->ifindex : 0; rcu_read_unlock(); return ifindex; } static int bond_fill_info(struct sk_buff *skb, const struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); unsigned int packets_per_slave; int ifindex, i, targets_added; struct nlattr *targets; struct slave *primary; if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) goto nla_put_failure; ifindex = bond_option_active_slave_get_ifindex(bond); if (ifindex && nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, ifindex)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIIMON, bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_UPDELAY, bond->params.updelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_DOWNDELAY, bond->params.downdelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_PEER_NOTIF_DELAY, bond->params.peer_notif_delay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) goto nla_put_failure; targets = nla_nest_start_noflag(skb, IFLA_BOND_ARP_IP_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { if (bond->params.arp_targets[i]) { if (nla_put_be32(skb, i, bond->params.arp_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); if (nla_put_u32(skb, IFLA_BOND_ARP_VALIDATE, bond->params.arp_validate)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_ALL_TARGETS, bond->params.arp_all_targets)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) targets = nla_nest_start(skb, IFLA_BOND_NS_IP6_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { if (!ipv6_addr_any(&bond->params.ns_targets[i])) { if (nla_put_in6_addr(skb, i, &bond->params.ns_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); #endif primary = rtnl_dereference(bond->primary_slave); if (primary && nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, bond->params.primary_reselect)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_FAIL_OVER_MAC, bond->params.fail_over_mac)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_XMIT_HASH_POLICY, bond->params.xmit_policy)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP, bond->params.resend_igmp)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF, bond->params.num_peer_notif)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_ALL_SLAVES_ACTIVE, bond->params.all_slaves_active)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIN_LINKS, bond->params.min_links)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_LP_INTERVAL, bond->params.lp_interval)) goto nla_put_failure; packets_per_slave = bond->params.packets_per_slave; if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, packets_per_slave)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, bond->params.lacp_active)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_SELECT, bond->params.ad_select)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_TLB_DYNAMIC_LB, bond->params.tlb_dynamic_lb)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, bond->params.missed_max)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL, bond->params.coupled_control)) goto nla_put_failure; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; if (capable(CAP_NET_ADMIN)) { if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, bond->params.ad_actor_sys_prio)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, bond->params.ad_user_port_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, IFLA_BOND_AD_INFO); if (!nest) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_AGGREGATOR, info.aggregator_id)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_NUM_PORTS, info.ports)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_ACTOR_KEY, info.actor_key)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_PARTNER_KEY, info.partner_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_INFO_PARTNER_MAC, sizeof(info.partner_system), &info.partner_system)) goto nla_put_failure; nla_nest_end(skb, nest); } } return 0; nla_put_failure: return -EMSGSIZE; } static size_t bond_get_linkxstats_size(const struct net_device *dev, int attr) { switch (attr) { case IFLA_STATS_LINK_XSTATS: case IFLA_STATS_LINK_XSTATS_SLAVE: break; default: return 0; } return bond_3ad_stats_size() + nla_total_size(0); } static int bond_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr) { struct nlattr *nla __maybe_unused; struct slave *slave = NULL; struct nlattr *nest, *nest2; struct bonding *bond; switch (attr) { case IFLA_STATS_LINK_XSTATS: bond = netdev_priv(dev); break; case IFLA_STATS_LINK_XSTATS_SLAVE: slave = bond_slave_get_rtnl(dev); if (!slave) return 0; bond = slave->bond; break; default: return -EINVAL; } nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BOND); if (!nest) return -EMSGSIZE; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct bond_3ad_stats *stats; if (slave) stats = &SLAVE_AD_INFO(slave)->stats; else stats = &BOND_AD_INFO(bond).stats; nest2 = nla_nest_start_noflag(skb, BOND_XSTATS_3AD); if (!nest2) { nla_nest_end(skb, nest); return -EMSGSIZE; } if (bond_3ad_stats_fill(skb, stats)) { nla_nest_cancel(skb, nest2); nla_nest_end(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest2); } nla_nest_end(skb, nest); return 0; } struct rtnl_link_ops bond_link_ops __read_mostly = { .kind = "bond", .priv_size = sizeof(struct bonding), .setup = bond_setup, .maxtype = IFLA_BOND_MAX, .policy = bond_policy, .validate = bond_validate, .newlink = bond_newlink, .changelink = bond_changelink, .get_size = bond_get_size, .fill_info = bond_fill_info, .get_num_tx_queues = bond_get_num_tx_queues, .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number as for TX queues */ .fill_linkxstats = bond_fill_linkxstats, .get_linkxstats_size = bond_get_linkxstats_size, .slave_maxtype = IFLA_BOND_SLAVE_MAX, .slave_policy = bond_slave_policy, .slave_changelink = bond_slave_changelink, .get_slave_size = bond_get_slave_size, .fill_slave_info = bond_fill_slave_info, }; int __init bond_netlink_init(void) { return rtnl_link_register(&bond_link_ops); } void bond_netlink_fini(void) { rtnl_link_unregister(&bond_link_ops); } MODULE_ALIAS_RTNL_LINK("bond");
25 25 4 21 152 152 152 5 2 4 15 15 15 15 11 102 102 24 63 63 33 1 3 8 23 31 42 31 2 36 35 4 33 33 40 243 243 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 // SPDX-License-Identifier: GPL-2.0-only /* * irqchip.c: Common API for in kernel interrupt controllers * Copyright (c) 2007, Intel Corporation. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * Copyright (c) 2013, Alexander Graf <agraf@suse.de> * * This file is derived from virt/kvm/irq_comm.c. * * Authors: * Yaozu (Eddie) Dong <Eddie.dong@intel.com> * Alexander Graf <agraf@suse.de> */ #include <linux/kvm_host.h> #include <linux/slab.h> #include <linux/srcu.h> #include <linux/export.h> #include <trace/events/kvm.h> int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) { struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, lockdep_is_held(&kvm->irq_lock)); if (irq_rt && gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; ++n; } } return n; } int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_routing_table *irq_rt; irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; if (!kvm_arch_irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) return -EINVAL; route.msi.address_lo = msi->address_lo; route.msi.address_hi = msi->address_hi; route.msi.data = msi->data; route.msi.flags = msi->flags; route.msi.devid = msi->devid; return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } /* * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to */ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; trace_kvm_set_irq(irq, level, irq_source_id); /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while (i--) { int r; r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, line_status); if (r < 0) continue; ret = r + ((ret < 0) ? 0 : ret); } return ret; } static void free_irq_routing_table(struct kvm_irq_routing_table *rt) { int i; if (!rt) return; for (i = 0; i < rt->nr_rt_entries; ++i) { struct kvm_kernel_irq_routing_entry *e; struct hlist_node *n; hlist_for_each_entry_safe(e, n, &rt->map[i], link) { hlist_del(&e->link); kfree(e); } } kfree(rt); } void kvm_free_irq_routing(struct kvm *kvm) { /* Called only during vm destruction. Nobody can use the pointer at this stage */ struct kvm_irq_routing_table *rt = rcu_access_pointer(kvm->irq_routing); free_irq_routing_table(rt); } static int setup_routing_entry(struct kvm *kvm, struct kvm_irq_routing_table *rt, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { struct kvm_kernel_irq_routing_entry *ei; int r; u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES); /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ hlist_for_each_entry(ei, &rt->map[gsi], link) if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return -EINVAL; e->gsi = gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) return r; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); return 0; } void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) { } bool __weak kvm_arch_can_set_irq_routing(struct kvm *kvm) { return true; } int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, unsigned flags) { struct kvm_irq_routing_table *new, *old; struct kvm_kernel_irq_routing_entry *e; u32 i, j, nr_rt_entries = 0; int r; for (i = 0; i < nr; ++i) { if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) return -EINVAL; nr_rt_entries = max(nr_rt_entries, ue[i].gsi); } nr_rt_entries += 1; new = kzalloc(struct_size(new, map, nr_rt_entries), GFP_KERNEL_ACCOUNT); if (!new) return -ENOMEM; new->nr_rt_entries = nr_rt_entries; for (i = 0; i < KVM_NR_IRQCHIPS; i++) for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { r = -ENOMEM; e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT); if (!e) goto out; r = -EINVAL; switch (ue->type) { case KVM_IRQ_ROUTING_MSI: if (ue->flags & ~KVM_MSI_VALID_DEVID) goto free_entry; break; default: if (ue->flags) goto free_entry; break; } r = setup_routing_entry(kvm, new, e, ue); if (r) goto free_entry; ++ue; } mutex_lock(&kvm->irq_lock); old = rcu_dereference_protected(kvm->irq_routing, 1); rcu_assign_pointer(kvm->irq_routing, new); kvm_irq_routing_update(kvm); kvm_arch_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); kvm_arch_post_irq_routing_update(kvm); synchronize_srcu_expedited(&kvm->irq_srcu); new = old; r = 0; goto out; free_entry: kfree(e); out: free_irq_routing_table(new); return r; } /* * Allocate empty IRQ routing by default so that additional setup isn't needed * when userspace-driven IRQ routing is activated, and so that kvm->irq_routing * is guaranteed to be non-NULL. */ int kvm_init_irq_routing(struct kvm *kvm) { struct kvm_irq_routing_table *new; int chip_size; new = kzalloc(struct_size(new, map, 1), GFP_KERNEL_ACCOUNT); if (!new) return -ENOMEM; new->nr_rt_entries = 1; chip_size = sizeof(int) * KVM_NR_IRQCHIPS * KVM_IRQCHIP_NUM_PINS; memset(new->chip, -1, chip_size); RCU_INIT_POINTER(kvm->irq_routing, new); return 0; }
3383 3394 3389 3389 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2019 Facebook * Copyright 2020 Google LLC. */ #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> #include <linux/bpf_local_storage.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> #include <linux/bpf_lsm.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> #include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(inode_cache); static struct bpf_local_storage __rcu ** inode_storage_ptr(void *owner) { struct inode *inode = owner; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); if (!bsb) return NULL; return &bsb->storage; } static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *inode_storage; struct bpf_local_storage_map *smap; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); if (!bsb) return NULL; inode_storage = rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); if (!inode_storage) return NULL; smap = (struct bpf_local_storage_map *)map; return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit); } void bpf_inode_storage_free(struct inode *inode) { struct bpf_local_storage *local_storage; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); if (!bsb) return; rcu_read_lock(); local_storage = rcu_dereference(bsb->storage); if (!local_storage) { rcu_read_unlock(); return; } bpf_local_storage_destroy(local_storage); rcu_read_unlock(); } static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) { struct bpf_local_storage_data *sdata; CLASS(fd_raw, f)(*(int *)key); if (fd_empty(f)) return ERR_PTR(-EBADF); sdata = inode_storage_lookup(file_inode(fd_file(f)), map, true); return sdata ? sdata->data : NULL; } static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; CLASS(fd_raw, f)(*(int *)key); if (fd_empty(f)) return -EBADF; if (!inode_storage_ptr(file_inode(fd_file(f)))) return -EBADF; sdata = bpf_local_storage_update(file_inode(fd_file(f)), (struct bpf_local_storage_map *)map, value, map_flags, GFP_ATOMIC); return PTR_ERR_OR_ZERO(sdata); } static int inode_storage_delete(struct inode *inode, struct bpf_map *map) { struct bpf_local_storage_data *sdata; sdata = inode_storage_lookup(inode, map, false); if (!sdata) return -ENOENT; bpf_selem_unlink(SELEM(sdata), false); return 0; } static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) { CLASS(fd_raw, f)(*(int *)key); if (fd_empty(f)) return -EBADF; return inode_storage_delete(file_inode(fd_file(f)), map); } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; /* explicitly check that the inode_storage_ptr is not * NULL as inode_storage_lookup returns NULL in this case and * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ if (!inode || !inode_storage_ptr(inode)) return (unsigned long)NULL; sdata = inode_storage_lookup(inode, map, true); if (sdata) return (unsigned long)sdata->data; /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { sdata = bpf_local_storage_update( inode, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, gfp_flags); return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } return (unsigned long)NULL; } BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!inode) return -EINVAL; /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ return inode_storage_delete(inode, map); } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; } static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { return bpf_local_storage_map_alloc(attr, &inode_cache, false); } static void inode_storage_map_free(struct bpf_map *map) { bpf_local_storage_map_free(map, &inode_cache, NULL); } const struct bpf_map_ops inode_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, .map_alloc = inode_storage_map_alloc, .map_free = inode_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_fd_inode_storage_lookup_elem, .map_update_elem = bpf_fd_inode_storage_update_elem, .map_delete_elem = bpf_fd_inode_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = inode_storage_ptr, }; BTF_ID_LIST_SINGLE(bpf_inode_storage_btf_ids, struct, inode) const struct bpf_func_proto bpf_inode_storage_get_proto = { .func = bpf_inode_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_inode_storage_delete_proto = { .func = bpf_inode_storage_delete, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &bpf_inode_storage_btf_ids[0], };
5 5 4 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 // SPDX-License-Identifier: GPL-2.0-or-later /* Instantiate a public key crypto key from an X.509 Certificate * * Copyright (C) 2012, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "ASYM: "fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/err.h> #include <crypto/public_key.h> #include "asymmetric_keys.h" static bool use_builtin_keys; static struct asymmetric_key_id *ca_keyid; #ifndef MODULE static struct { struct asymmetric_key_id id; unsigned char data[10]; } cakey; static int __init ca_keys_setup(char *str) { if (!str) /* default system keyring */ return 1; if (strncmp(str, "id:", 3) == 0) { struct asymmetric_key_id *p = &cakey.id; size_t hexlen = (strlen(str) - 3) / 2; int ret; if (hexlen == 0 || hexlen > sizeof(cakey.data)) { pr_err("Missing or invalid ca_keys id\n"); return 1; } ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen); if (ret < 0) pr_err("Unparsable ca_keys id hex string\n"); else ca_keyid = p; /* owner key 'id:xxxxxx' */ } else if (strcmp(str, "builtin") == 0) { use_builtin_keys = true; } return 1; } __setup("ca_keys=", ca_keys_setup); #endif /** * restrict_link_by_signature - Restrict additions to a ring of public keys * @dest_keyring: Keyring being linked to. * @type: The type of key being added. * @payload: The payload of the new key. * @trust_keyring: A ring of keys that can be used to vouch for the new cert. * * Check the new certificate against the ones in the trust keyring. If one of * those is the signing key and validates the new certificate, then mark the * new certificate as being trusted. * * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a * matching parent certificate in the trusted list, -EKEYREJECTED if the * signature check fails or the key is blacklisted, -ENOPKG if the signature * uses unsupported crypto, or some other error if there is a matching * certificate but the signature check cannot be performed. */ int restrict_link_by_signature(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trust_keyring) { const struct public_key_signature *sig; struct key *key; int ret; pr_devel("==>%s()\n", __func__); if (!trust_keyring) return -ENOKEY; if (type != &key_type_asymmetric) return -EOPNOTSUPP; sig = payload->data[asym_auth]; if (!sig) return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1] && !sig->auth_ids[2]) return -ENOKEY; if (ca_keyid && !asymmetric_key_id_partial(sig->auth_ids[1], ca_keyid)) return -EPERM; /* See if we have a key that signed this one. */ key = find_asymmetric_key(trust_keyring, sig->auth_ids[0], sig->auth_ids[1], sig->auth_ids[2], false); if (IS_ERR(key)) return -ENOKEY; if (use_builtin_keys && !test_bit(KEY_FLAG_BUILTIN, &key->flags)) ret = -ENOKEY; else if (IS_BUILTIN(CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN) && !strcmp(dest_keyring->description, ".secondary_trusted_keys") && !test_bit(KEY_FLAG_BUILTIN, &key->flags)) ret = -ENOKEY; else ret = verify_signature(key, sig); key_put(key); return ret; } /** * restrict_link_by_ca - Restrict additions to a ring of CA keys * @dest_keyring: Keyring being linked to. * @type: The type of key being added. * @payload: The payload of the new key. * @trust_keyring: Unused. * * Check if the new certificate is a CA. If it is a CA, then mark the new * certificate as being ok to link. * * Returns 0 if the new certificate was accepted, -ENOKEY if the * certificate is not a CA. -ENOPKG if the signature uses unsupported * crypto, or some other error if there is a matching certificate but * the signature check cannot be performed. */ int restrict_link_by_ca(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trust_keyring) { const struct public_key *pkey; if (type != &key_type_asymmetric) return -EOPNOTSUPP; pkey = payload->data[asym_crypto]; if (!pkey) return -ENOPKG; if (!test_bit(KEY_EFLAG_CA, &pkey->key_eflags)) return -ENOKEY; if (!test_bit(KEY_EFLAG_KEYCERTSIGN, &pkey->key_eflags)) return -ENOKEY; if (!IS_ENABLED(CONFIG_INTEGRITY_CA_MACHINE_KEYRING_MAX)) return 0; if (test_bit(KEY_EFLAG_DIGITALSIG, &pkey->key_eflags)) return -ENOKEY; return 0; } /** * restrict_link_by_digsig - Restrict additions to a ring of digsig keys * @dest_keyring: Keyring being linked to. * @type: The type of key being added. * @payload: The payload of the new key. * @trust_keyring: A ring of keys that can be used to vouch for the new cert. * * Check if the new certificate has digitalSignature usage set. If it is, * then mark the new certificate as being ok to link. Afterwards verify * the new certificate against the ones in the trust_keyring. * * Returns 0 if the new certificate was accepted, -ENOKEY if the * certificate is not a digsig. -ENOPKG if the signature uses unsupported * crypto, or some other error if there is a matching certificate but * the signature check cannot be performed. */ int restrict_link_by_digsig(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trust_keyring) { const struct public_key *pkey; if (type != &key_type_asymmetric) return -EOPNOTSUPP; pkey = payload->data[asym_crypto]; if (!pkey) return -ENOPKG; if (!test_bit(KEY_EFLAG_DIGITALSIG, &pkey->key_eflags)) return -ENOKEY; if (test_bit(KEY_EFLAG_CA, &pkey->key_eflags)) return -ENOKEY; if (test_bit(KEY_EFLAG_KEYCERTSIGN, &pkey->key_eflags)) return -ENOKEY; return restrict_link_by_signature(dest_keyring, type, payload, trust_keyring); } static bool match_either_id(const struct asymmetric_key_id **pair, const struct asymmetric_key_id *single) { return (asymmetric_key_id_same(pair[0], single) || asymmetric_key_id_same(pair[1], single)); } static int key_or_keyring_common(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trusted, bool check_dest) { const struct public_key_signature *sig; struct key *key = NULL; int ret; pr_devel("==>%s()\n", __func__); if (!dest_keyring) return -ENOKEY; else if (dest_keyring->type != &key_type_keyring) return -EOPNOTSUPP; if (!trusted && !check_dest) return -ENOKEY; if (type != &key_type_asymmetric) return -EOPNOTSUPP; sig = payload->data[asym_auth]; if (!sig) return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1] && !sig->auth_ids[2]) return -ENOKEY; if (trusted) { if (trusted->type == &key_type_keyring) { /* See if we have a key that signed this one. */ key = find_asymmetric_key(trusted, sig->auth_ids[0], sig->auth_ids[1], sig->auth_ids[2], false); if (IS_ERR(key)) key = NULL; } else if (trusted->type == &key_type_asymmetric) { const struct asymmetric_key_id **signer_ids; signer_ids = (const struct asymmetric_key_id **) asymmetric_key_ids(trusted)->id; /* * The auth_ids come from the candidate key (the * one that is being considered for addition to * dest_keyring) and identify the key that was * used to sign. * * The signer_ids are identifiers for the * signing key specified for dest_keyring. * * The first auth_id is the preferred id, 2nd and * 3rd are the fallbacks. If exactly one of * auth_ids[0] and auth_ids[1] is present, it may * match either signer_ids[0] or signed_ids[1]. * If both are present the first one may match * either signed_id but the second one must match * the second signer_id. If neither of them is * available, auth_ids[2] is matched against * signer_ids[2] as a fallback. */ if (!sig->auth_ids[0] && !sig->auth_ids[1]) { if (asymmetric_key_id_same(signer_ids[2], sig->auth_ids[2])) key = __key_get(trusted); } else if (!sig->auth_ids[0] || !sig->auth_ids[1]) { const struct asymmetric_key_id *auth_id; auth_id = sig->auth_ids[0] ?: sig->auth_ids[1]; if (match_either_id(signer_ids, auth_id)) key = __key_get(trusted); } else if (asymmetric_key_id_same(signer_ids[1], sig->auth_ids[1]) && match_either_id(signer_ids, sig->auth_ids[0])) { key = __key_get(trusted); } } else { return -EOPNOTSUPP; } } if (check_dest && !key) { /* See if the destination has a key that signed this one. */ key = find_asymmetric_key(dest_keyring, sig->auth_ids[0], sig->auth_ids[1], sig->auth_ids[2], false); if (IS_ERR(key)) key = NULL; } if (!key) return -ENOKEY; ret = key_validate(key); if (ret == 0) ret = verify_signature(key, sig); key_put(key); return ret; } /** * restrict_link_by_key_or_keyring - Restrict additions to a ring of public * keys using the restrict_key information stored in the ring. * @dest_keyring: Keyring being linked to. * @type: The type of key being added. * @payload: The payload of the new key. * @trusted: A key or ring of keys that can be used to vouch for the new cert. * * Check the new certificate only against the key or keys passed in the data * parameter. If one of those is the signing key and validates the new * certificate, then mark the new certificate as being ok to link. * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses * unsupported crypto, or some other error if there is a matching certificate * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trusted) { return key_or_keyring_common(dest_keyring, type, payload, trusted, false); } /** * restrict_link_by_key_or_keyring_chain - Restrict additions to a ring of * public keys using the restrict_key information stored in the ring. * @dest_keyring: Keyring being linked to. * @type: The type of key being added. * @payload: The payload of the new key. * @trusted: A key or ring of keys that can be used to vouch for the new cert. * * Check the new certificate against the key or keys passed in the data * parameter and against the keys already linked to the destination keyring. If * one of those is the signing key and validates the new certificate, then mark * the new certificate as being ok to link. * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses * unsupported crypto, or some other error if there is a matching certificate * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring, const struct key_type *type, const union key_payload *payload, struct key *trusted) { return key_or_keyring_common(dest_keyring, type, payload, trusted, true); }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UNWIND_H #define _ASM_X86_UNWIND_H #include <linux/sched.h> #include <linux/ftrace.h> #include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) #define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; struct task_struct *task; int graph_idx; #if defined(CONFIG_RETHOOK) struct llist_node *kr_cur; #endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; /* * If non-NULL: The current frame is incomplete and doesn't contain a * valid BP. When looking for the next frame, use this instead of the * non-existent saved BP. */ unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; #endif }; void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } static inline bool unwind_error(struct unwind_state *state) { return state->error; } static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { first_frame = first_frame ? : get_stack_pointer(task, regs); __unwind_start(state, task, regs, first_frame); } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* * If 'partial' returns true, only the iret frame registers are valid. */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { if (unwind_done(state)) return NULL; if (partial) { #ifdef CONFIG_UNWINDER_ORC *partial = !state->full_regs; #else *partial = false; #endif } return state->regs; } #else static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { return NULL; } #endif #ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); #else static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif static inline unsigned long unwind_recover_rethook(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { #ifdef CONFIG_RETHOOK if (is_rethook_trampoline(addr)) return rethook_find_ret_addr(state->task, (unsigned long)addr_p, &state->kr_cur); #endif return addr; } /* Recover the return address modified by rethook and ftrace_graph. */ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { unsigned long ret; ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); return unwind_recover_rethook(state, ret, addr_p); } /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned * the stack in the meantime. */ #define READ_ONCE_TASK_STACK(task, x) \ ({ \ unsigned long val; \ if (task == current) \ val = READ_ONCE(x); \ else \ val = READ_ONCE_NOCHECK(x); \ val; \ }) static inline bool task_on_another_cpu(struct task_struct *task) { #ifdef CONFIG_SMP return task != current && task->on_cpu; #else return false; #endif } #endif /* _ASM_X86_UNWIND_H */
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 #ifndef __DRM_VMA_MANAGER_H__ #define __DRM_VMA_MANAGER_H__ /* * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <drm/drm_mm.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/types.h> /* We make up offsets for buffer objects so we can recognize them at * mmap time. pgoff in mmap is an unsigned long, so we need to make sure * that the faked up offset will fit */ #if BITS_PER_LONG == 64 #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 256) #else #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16) #endif struct drm_file; struct drm_vma_offset_file { struct rb_node vm_rb; struct drm_file *vm_tag; unsigned long vm_count; }; struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; void *driver_private; }; struct drm_vma_offset_manager { rwlock_t vm_lock; struct drm_mm vm_addr_space_mm; }; void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr, unsigned long page_offset, unsigned long size); void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr); struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages); int drm_vma_offset_add(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node, unsigned long pages); void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node); int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag); int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag); void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct drm_file *tag); bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node, struct drm_file *tag); /** * drm_vma_offset_exact_lookup_locked() - Look up node by exact address * @mgr: Manager object * @start: Start address (page-based, not byte-based) * @pages: Size of object (page-based) * * Same as drm_vma_offset_lookup_locked() but does not allow any offset into the node. * It only returns the exact object with the given start address. * * RETURNS: * Node at exact start address @start. */ static inline struct drm_vma_offset_node * drm_vma_offset_exact_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages) { struct drm_vma_offset_node *node; node = drm_vma_offset_lookup_locked(mgr, start, pages); return (node && node->vm_node.start == start) ? node : NULL; } /** * drm_vma_offset_lock_lookup() - Lock lookup for extended private use * @mgr: Manager object * * Lock VMA manager for extended lookups. Only locked VMA function calls * are allowed while holding this lock. All other contexts are blocked from VMA * until the lock is released via drm_vma_offset_unlock_lookup(). * * Use this if you need to take a reference to the objects returned by * drm_vma_offset_lookup_locked() before releasing this lock again. * * This lock must not be used for anything else than extended lookups. You must * not call any other VMA helpers while holding this lock. * * Note: You're in atomic-context while holding this lock! */ static inline void drm_vma_offset_lock_lookup(struct drm_vma_offset_manager *mgr) { read_lock(&mgr->vm_lock); } /** * drm_vma_offset_unlock_lookup() - Unlock lookup for extended private use * @mgr: Manager object * * Release lookup-lock. See drm_vma_offset_lock_lookup() for more information. */ static inline void drm_vma_offset_unlock_lookup(struct drm_vma_offset_manager *mgr) { read_unlock(&mgr->vm_lock); } /** * drm_vma_node_reset() - Initialize or reset node object * @node: Node to initialize or reset * * Reset a node to its initial state. This must be called before using it with * any VMA offset manager. * * This must not be called on an already allocated node, or you will leak * memory. */ static inline void drm_vma_node_reset(struct drm_vma_offset_node *node) { memset(node, 0, sizeof(*node)); node->vm_files = RB_ROOT; rwlock_init(&node->vm_lock); } /** * drm_vma_node_start() - Return start address for page-based addressing * @node: Node to inspect * * Return the start address of the given node. This can be used as offset into * the linear VM space that is provided by the VMA offset manager. Note that * this can only be used for page-based addressing. If you need a proper offset * for user-space mappings, you must apply "<< PAGE_SHIFT" or use the * drm_vma_node_offset_addr() helper instead. * * RETURNS: * Start address of @node for page-based addressing. 0 if the node does not * have an offset allocated. */ static inline unsigned long drm_vma_node_start(const struct drm_vma_offset_node *node) { return node->vm_node.start; } /** * drm_vma_node_size() - Return size (page-based) * @node: Node to inspect * * Return the size as number of pages for the given node. This is the same size * that was passed to drm_vma_offset_add(). If no offset is allocated for the * node, this is 0. * * RETURNS: * Size of @node as number of pages. 0 if the node does not have an offset * allocated. */ static inline unsigned long drm_vma_node_size(struct drm_vma_offset_node *node) { return node->vm_node.size; } /** * drm_vma_node_offset_addr() - Return sanitized offset for user-space mmaps * @node: Linked offset node * * Same as drm_vma_node_start() but returns the address as a valid offset that * can be used for user-space mappings during mmap(). * This must not be called on unlinked nodes. * * RETURNS: * Offset of @node for byte-based addressing. 0 if the node does not have an * object allocated. */ static inline __u64 drm_vma_node_offset_addr(struct drm_vma_offset_node *node) { return ((__u64)node->vm_node.start) << PAGE_SHIFT; } /** * drm_vma_node_unmap() - Unmap offset node * @node: Offset node * @file_mapping: Address space to unmap @node from * * Unmap all userspace mappings for a given offset node. The mappings must be * associated with the @file_mapping address-space. If no offset exists * nothing is done. * * This call is unlocked. The caller must guarantee that drm_vma_offset_remove() * is not called on this node concurrently. */ static inline void drm_vma_node_unmap(struct drm_vma_offset_node *node, struct address_space *file_mapping) { if (drm_mm_node_allocated(&node->vm_node)) unmap_mapping_range(file_mapping, drm_vma_node_offset_addr(node), drm_vma_node_size(node) << PAGE_SHIFT, 1); } /** * drm_vma_node_verify_access() - Access verification helper for TTM * @node: Offset node * @tag: Tag of file to check * * This checks whether @tag is granted access to @node. It is the same as * drm_vma_node_is_allowed() but suitable as drop-in helper for TTM * verify_access() callbacks. * * RETURNS: * 0 if access is granted, -EACCES otherwise. */ static inline int drm_vma_node_verify_access(struct drm_vma_offset_node *node, struct drm_file *tag) { return drm_vma_node_is_allowed(node, tag) ? 0 : -EACCES; } #endif /* __DRM_VMA_MANAGER_H__ */
2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match ESP parameters. */ /* (C) 1999-2000 Yon Uriarte <yon@astaro.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter/xt_esp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match"); MODULE_ALIAS("ipt_esp"); MODULE_ALIAS("ip6t_esp"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esp; const struct xt_esp *espinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp); if (eh == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ pr_debug("Dropping evil ESP tinygram.\n"); par->hotdrop = true; return false; } return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi), !!(espinfo->invflags & XT_ESP_INV_SPI)); } static int esp_mt_check(const struct xt_mtchk_param *par) { const struct xt_esp *espinfo = par->matchinfo; if (espinfo->invflags & ~XT_ESP_INV_MASK) { pr_debug("unknown flags %X\n", espinfo->invflags); return -EINVAL; } return 0; } static struct xt_match esp_mt_reg[] __read_mostly = { { .name = "esp", .family = NFPROTO_IPV4, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), .proto = IPPROTO_ESP, .me = THIS_MODULE, }, { .name = "esp", .family = NFPROTO_IPV6, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), .proto = IPPROTO_ESP, .me = THIS_MODULE, }, }; static int __init esp_mt_init(void) { return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg)); } static void __exit esp_mt_exit(void) { xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg)); } module_init(esp_mt_init); module_exit(esp_mt_exit);
5281 3696 3413 76 76 2870 2042 1361 1361 28 103 1353 28 1360 1 1 1 1 1333 1333 1331 780 1270 1332 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 // SPDX-License-Identifier: GPL-2.0 /* * class.c - basic device class management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2003-2004 Greg Kroah-Hartman * Copyright (c) 2003-2004 IBM Corp. */ #include <linux/device/class.h> #include <linux/device.h> #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kdev_t.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/mutex.h> #include "base.h" /* /sys/class */ static struct kset *class_kset; #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr) /** * class_to_subsys - Turn a struct class into a struct subsys_private * * @class: pointer to the struct bus_type to look up * * The driver core internals need to work on the subsys_private structure, not * the external struct class pointer. This function walks the list of * registered classes in the system and finds the matching one and returns the * internal struct subsys_private that relates to that class. * * Note, the reference count of the return value is INCREMENTED if it is not * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ struct subsys_private *class_to_subsys(const struct class *class) { struct subsys_private *sp = NULL; struct kobject *kobj; if (!class || !class_kset) return NULL; spin_lock(&class_kset->list_lock); if (list_empty(&class_kset->list)) goto done; list_for_each_entry(kobj, &class_kset->list, entry) { struct kset *kset = container_of(kobj, struct kset, kobj); sp = container_of_const(kset, struct subsys_private, subsys); if (sp->class == class) goto done; } sp = NULL; done: sp = subsys_get(sp); spin_unlock(&class_kset->list_lock); return sp; } static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct class_attribute *class_attr = to_class_attr(attr); struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->show) ret = class_attr->show(cp->class, class_attr, buf); return ret; } static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct class_attribute *class_attr = to_class_attr(attr); struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->store) ret = class_attr->store(cp->class, class_attr, buf, count); return ret; } static void class_release(struct kobject *kobj) { struct subsys_private *cp = to_subsys_private(kobj); const struct class *class = cp->class; pr_debug("class '%s': release.\n", class->name); if (class->class_release) class->class_release(class); else pr_debug("class '%s' does not have a release() function, " "be careful\n", class->name); lockdep_unregister_key(&cp->lock_key); kfree(cp); } static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj) { const struct subsys_private *cp = to_subsys_private(kobj); const struct class *class = cp->class; return class->ns_type; } static const struct sysfs_ops class_sysfs_ops = { .show = class_attr_show, .store = class_attr_store, }; static const struct kobj_type class_ktype = { .sysfs_ops = &class_sysfs_ops, .release = class_release, .child_ns_type = class_child_ns_type, }; int class_create_file_ns(const struct class *cls, const struct class_attribute *attr, const void *ns) { struct subsys_private *sp = class_to_subsys(cls); int error; if (!sp) return -EINVAL; error = sysfs_create_file_ns(&sp->subsys.kobj, &attr->attr, ns); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(class_create_file_ns); void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr, const void *ns) { struct subsys_private *sp = class_to_subsys(cls); if (!sp) return; sysfs_remove_file_ns(&sp->subsys.kobj, &attr->attr, ns); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_remove_file_ns); static struct device *klist_class_to_dev(struct klist_node *n) { struct device_private *p = to_device_private_class(n); return p->device; } static void klist_class_dev_get(struct klist_node *n) { struct device *dev = klist_class_to_dev(n); get_device(dev); } static void klist_class_dev_put(struct klist_node *n) { struct device *dev = klist_class_to_dev(n); put_device(dev); } int class_register(const struct class *cls) { struct subsys_private *cp; struct lock_class_key *key; int error; pr_debug("device class '%s': registering\n", cls->name); if (cls->ns_type && !cls->namespace) { pr_err("%s: class '%s' does not have namespace\n", __func__, cls->name); return -EINVAL; } if (!cls->ns_type && cls->namespace) { pr_err("%s: class '%s' does not have ns_type\n", __func__, cls->name); return -EINVAL; } cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->interfaces); kset_init(&cp->glue_dirs); key = &cp->lock_key; lockdep_register_key(key); __mutex_init(&cp->mutex, "subsys mutex", key); error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name); if (error) goto err_out; cp->subsys.kobj.kset = class_kset; cp->subsys.kobj.ktype = &class_ktype; cp->class = cls; error = kset_register(&cp->subsys); if (error) goto err_out; error = sysfs_create_groups(&cp->subsys.kobj, cls->class_groups); if (error) { kobject_del(&cp->subsys.kobj); kfree_const(cp->subsys.kobj.name); goto err_out; } return 0; err_out: lockdep_unregister_key(key); kfree(cp); return error; } EXPORT_SYMBOL_GPL(class_register); void class_unregister(const struct class *cls) { struct subsys_private *sp = class_to_subsys(cls); if (!sp) return; pr_debug("device class '%s': unregistering\n", cls->name); sysfs_remove_groups(&sp->subsys.kobj, cls->class_groups); kset_unregister(&sp->subsys); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_unregister); static void class_create_release(const struct class *cls) { pr_debug("%s called for %s\n", __func__, cls->name); kfree(cls); } /** * class_create - create a struct class structure * @name: pointer to a string for the name of this class. * * This is used to create a struct class pointer that can then be used * in calls to device_create(). * * Returns &struct class pointer on success, or ERR_PTR() on error. * * Note, the pointer created here is to be destroyed when finished by * making a call to class_destroy(). */ struct class *class_create(const char *name) { struct class *cls; int retval; cls = kzalloc(sizeof(*cls), GFP_KERNEL); if (!cls) { retval = -ENOMEM; goto error; } cls->name = name; cls->class_release = class_create_release; retval = class_register(cls); if (retval) goto error; return cls; error: kfree(cls); return ERR_PTR(retval); } EXPORT_SYMBOL_GPL(class_create); /** * class_destroy - destroys a struct class structure * @cls: pointer to the struct class that is to be destroyed * * Note, the pointer to be destroyed must have been created with a call * to class_create(). */ void class_destroy(const struct class *cls) { if (IS_ERR_OR_NULL(cls)) return; class_unregister(cls); } EXPORT_SYMBOL_GPL(class_destroy); /** * class_dev_iter_init - initialize class device iterator * @iter: class iterator to initialize * @class: the class we wanna iterate over * @start: the device to start iterating from, if any * @type: device_type of the devices to iterate over, NULL for all * * Initialize class iterator @iter such that it iterates over devices * of @class. If @start is set, the list iteration will start there, * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type) { struct subsys_private *sp = class_to_subsys(class); struct klist_node *start_knode = NULL; if (!sp) return; if (start) start_knode = &start->p->knode_class; klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode); iter->type = type; iter->sp = sp; } EXPORT_SYMBOL_GPL(class_dev_iter_init); /** * class_dev_iter_next - iterate to the next device * @iter: class iterator to proceed * * Proceed @iter to the next device and return it. Returns NULL if * iteration is complete. * * The returned device is referenced and won't be released till * iterator is proceed to the next device or exited. The caller is * free to do whatever it wants to do with the device including * calling back into class code. */ struct device *class_dev_iter_next(struct class_dev_iter *iter) { struct klist_node *knode; struct device *dev; while (1) { knode = klist_next(&iter->ki); if (!knode) return NULL; dev = klist_class_to_dev(knode); if (!iter->type || iter->type == dev->type) return dev; } } EXPORT_SYMBOL_GPL(class_dev_iter_next); /** * class_dev_iter_exit - finish iteration * @iter: class iterator to finish * * Finish an iteration. Always call this function after iteration is * complete whether the iteration ran till the end or not. */ void class_dev_iter_exit(struct class_dev_iter *iter) { klist_iter_exit(&iter->ki); subsys_put(iter->sp); } EXPORT_SYMBOL_GPL(class_dev_iter_exit); /** * class_for_each_device - device iterator * @class: the class we're iterating * @start: the device to start with in the list, if any. * @data: data for the callback * @fn: function to be called for each device * * Iterate over @class's list of devices, and call @fn for each, * passing it @data. If @start is set, the list iteration will start * there, otherwise if it is NULL, the iteration starts at the * beginning of the list. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * * @fn is allowed to do anything including calling back into class * code. There's no locking restriction. */ int class_for_each_device(const struct class *class, const struct device *start, void *data, int (*fn)(struct device *, void *)) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; struct device *dev; int error = 0; if (!class) return -EINVAL; if (!sp) { WARN(1, "%s called for class '%s' before it was initialized", __func__, class->name); return -EINVAL; } class_dev_iter_init(&iter, class, start, NULL); while ((dev = class_dev_iter_next(&iter))) { error = fn(dev, data); if (error) break; } class_dev_iter_exit(&iter); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(class_for_each_device); /** * class_find_device - device iterator for locating a particular device * @class: the class we're iterating * @start: Device to begin with * @data: data for the match function * @match: function to check device * * This is similar to the class_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. * * Note, you will need to drop the reference with put_device() after use. * * @match is allowed to do anything including calling back into class * code. There's no locking restriction. */ struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; struct device *dev; if (!class) return NULL; if (!sp) { WARN(1, "%s called for class '%s' before it was initialized", __func__, class->name); return NULL; } class_dev_iter_init(&iter, class, start, NULL); while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { get_device(dev); break; } } class_dev_iter_exit(&iter); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(class_find_device); int class_interface_register(struct class_interface *class_intf) { struct subsys_private *sp; const struct class *parent; struct class_dev_iter iter; struct device *dev; if (!class_intf || !class_intf->class) return -ENODEV; parent = class_intf->class; sp = class_to_subsys(parent); if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the interface is removed in the call to class_interface_unregister() */ mutex_lock(&sp->mutex); list_add_tail(&class_intf->node, &sp->interfaces); if (class_intf->add_dev) { class_dev_iter_init(&iter, parent, NULL, NULL); while ((dev = class_dev_iter_next(&iter))) class_intf->add_dev(dev); class_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); return 0; } EXPORT_SYMBOL_GPL(class_interface_register); void class_interface_unregister(struct class_interface *class_intf) { struct subsys_private *sp; const struct class *parent = class_intf->class; struct class_dev_iter iter; struct device *dev; if (!parent) return; sp = class_to_subsys(parent); if (!sp) return; mutex_lock(&sp->mutex); list_del_init(&class_intf->node); if (class_intf->remove_dev) { class_dev_iter_init(&iter, parent, NULL, NULL); while ((dev = class_dev_iter_next(&iter))) class_intf->remove_dev(dev); class_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); /* * Decrement the reference count twice, once for the class_to_subsys() * call in the start of this function, and the second one from the * reference increment in class_interface_register() */ subsys_put(sp); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_interface_unregister); ssize_t show_class_attr_string(const struct class *class, const struct class_attribute *attr, char *buf) { struct class_attribute_string *cs; cs = container_of(attr, struct class_attribute_string, attr); return sysfs_emit(buf, "%s\n", cs->str); } EXPORT_SYMBOL_GPL(show_class_attr_string); struct class_compat { struct kobject *kobj; }; /** * class_compat_register - register a compatibility class * @name: the name of the class * * Compatibility class are meant as a temporary user-space compatibility * workaround when converting a family of class devices to a bus devices. */ struct class_compat *class_compat_register(const char *name) { struct class_compat *cls; cls = kmalloc(sizeof(struct class_compat), GFP_KERNEL); if (!cls) return NULL; cls->kobj = kobject_create_and_add(name, &class_kset->kobj); if (!cls->kobj) { kfree(cls); return NULL; } return cls; } EXPORT_SYMBOL_GPL(class_compat_register); /** * class_compat_unregister - unregister a compatibility class * @cls: the class to unregister */ void class_compat_unregister(struct class_compat *cls) { kobject_put(cls->kobj); kfree(cls); } EXPORT_SYMBOL_GPL(class_compat_unregister); /** * class_compat_create_link - create a compatibility class device link to * a bus device * @cls: the compatibility class * @dev: the target bus device * @device_link: an optional device to which a "device" link should be created */ int class_compat_create_link(struct class_compat *cls, struct device *dev, struct device *device_link) { int error; error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); if (error) return error; /* * Optionally add a "device" link (typically to the parent), as a * class device would have one and we want to provide as much * backwards compatibility as possible. */ if (device_link) { error = sysfs_create_link(&dev->kobj, &device_link->kobj, "device"); if (error) sysfs_remove_link(cls->kobj, dev_name(dev)); } return error; } EXPORT_SYMBOL_GPL(class_compat_create_link); /** * class_compat_remove_link - remove a compatibility class device link to * a bus device * @cls: the compatibility class * @dev: the target bus device * @device_link: an optional device to which a "device" link was previously * created */ void class_compat_remove_link(struct class_compat *cls, struct device *dev, struct device *device_link) { if (device_link) sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(cls->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_remove_link); /** * class_is_registered - determine if at this moment in time, a class is * registered in the driver core or not. * @class: the class to check * * Returns a boolean to state if the class is registered in the driver core * or not. Note that the value could switch right after this call is made, * so only use this in places where you "know" it is safe to do so (usually * to determine if the specific class has been registered yet or not). * * Be careful in using this. */ bool class_is_registered(const struct class *class) { struct subsys_private *sp = class_to_subsys(class); bool is_initialized = false; if (sp) { is_initialized = true; subsys_put(sp); } return is_initialized; } EXPORT_SYMBOL_GPL(class_is_registered); int __init classes_init(void) { class_kset = kset_create_and_add("class", NULL, NULL); if (!class_kset) return -ENOMEM; return 0; }
10 2 1 10 1 2 2 6 1 2 2 3 1 4 1 4 4 1 2 2 1 3 3 3 2 2 22 1 21 20 1 422 410 2 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Linux input subsystem * * Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com> * Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru> */ /* #define DEBUG */ #include <linux/input.h> #include <linux/limits.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/overflow.h> #include <linux/sched.h> #include <linux/slab.h> /* * Check that the effect_id is a valid effect and whether the user * is the owner */ static int check_effect_access(struct ff_device *ff, int effect_id, struct file *file) { if (effect_id < 0 || effect_id >= ff->max_effects || !ff->effect_owners[effect_id]) return -EINVAL; if (file && ff->effect_owners[effect_id] != file) return -EACCES; return 0; } /* * Checks whether 2 effects can be combined together */ static inline int check_effects_compatible(struct ff_effect *e1, struct ff_effect *e2) { return e1->type == e2->type && (e1->type != FF_PERIODIC || e1->u.periodic.waveform == e2->u.periodic.waveform); } /* * Convert an effect into compatible one */ static int compat_effect(struct ff_device *ff, struct ff_effect *effect) { int magnitude; switch (effect->type) { case FF_RUMBLE: if (!test_bit(FF_PERIODIC, ff->ffbit)) return -EINVAL; /* * calculate magnitude of sine wave as average of rumble's * 2/3 of strong magnitude and 1/3 of weak magnitude */ magnitude = effect->u.rumble.strong_magnitude / 3 + effect->u.rumble.weak_magnitude / 6; effect->type = FF_PERIODIC; effect->u.periodic.waveform = FF_SINE; effect->u.periodic.period = 50; effect->u.periodic.magnitude = magnitude; effect->u.periodic.offset = 0; effect->u.periodic.phase = 0; effect->u.periodic.envelope.attack_length = 0; effect->u.periodic.envelope.attack_level = 0; effect->u.periodic.envelope.fade_length = 0; effect->u.periodic.envelope.fade_level = 0; return 0; default: /* Let driver handle conversion */ return 0; } } /** * input_ff_upload() - upload effect into force-feedback device * @dev: input device * @effect: effect to be uploaded * @file: owner of the effect */ int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file) { struct ff_device *ff = dev->ff; struct ff_effect *old; int ret = 0; int id; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; if (effect->type < FF_EFFECT_MIN || effect->type > FF_EFFECT_MAX || !test_bit(effect->type, dev->ffbit)) { dev_dbg(&dev->dev, "invalid or not supported effect type in upload\n"); return -EINVAL; } if (effect->type == FF_PERIODIC && (effect->u.periodic.waveform < FF_WAVEFORM_MIN || effect->u.periodic.waveform > FF_WAVEFORM_MAX || !test_bit(effect->u.periodic.waveform, dev->ffbit))) { dev_dbg(&dev->dev, "invalid or not supported wave form in upload\n"); return -EINVAL; } if (!test_bit(effect->type, ff->ffbit)) { ret = compat_effect(ff, effect); if (ret) return ret; } mutex_lock(&ff->mutex); if (effect->id == -1) { for (id = 0; id < ff->max_effects; id++) if (!ff->effect_owners[id]) break; if (id >= ff->max_effects) { ret = -ENOSPC; goto out; } effect->id = id; old = NULL; } else { id = effect->id; ret = check_effect_access(ff, id, file); if (ret) goto out; old = &ff->effects[id]; if (!check_effects_compatible(effect, old)) { ret = -EINVAL; goto out; } } ret = ff->upload(dev, effect, old); if (ret) goto out; spin_lock_irq(&dev->event_lock); ff->effects[id] = *effect; ff->effect_owners[id] = file; spin_unlock_irq(&dev->event_lock); out: mutex_unlock(&ff->mutex); return ret; } EXPORT_SYMBOL_GPL(input_ff_upload); /* * Erases the effect if the requester is also the effect owner. The mutex * should already be locked before calling this function. */ static int erase_effect(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; int error; error = check_effect_access(ff, effect_id, file); if (error) return error; spin_lock_irq(&dev->event_lock); ff->playback(dev, effect_id, 0); ff->effect_owners[effect_id] = NULL; spin_unlock_irq(&dev->event_lock); if (ff->erase) { error = ff->erase(dev, effect_id); if (error) { spin_lock_irq(&dev->event_lock); ff->effect_owners[effect_id] = file; spin_unlock_irq(&dev->event_lock); return error; } } return 0; } /** * input_ff_erase - erase a force-feedback effect from device * @dev: input device to erase effect from * @effect_id: id of the effect to be erased * @file: purported owner of the request * * This function erases a force-feedback effect from specified device. * The effect will only be erased if it was uploaded through the same * file handle that is requesting erase. */ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; int ret; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; mutex_lock(&ff->mutex); ret = erase_effect(dev, effect_id, file); mutex_unlock(&ff->mutex); return ret; } EXPORT_SYMBOL_GPL(input_ff_erase); /* * input_ff_flush - erase all effects owned by a file handle * @dev: input device to erase effect from * @file: purported owner of the effects * * This function erases all force-feedback effects associated with * the given owner from specified device. Note that @file may be %NULL, * in which case all effects will be erased. */ int input_ff_flush(struct input_dev *dev, struct file *file) { struct ff_device *ff = dev->ff; int i; dev_dbg(&dev->dev, "flushing now\n"); mutex_lock(&ff->mutex); for (i = 0; i < ff->max_effects; i++) erase_effect(dev, i, file); mutex_unlock(&ff->mutex); return 0; } EXPORT_SYMBOL_GPL(input_ff_flush); /** * input_ff_event() - generic handler for force-feedback events * @dev: input device to send the effect to * @type: event type (anything but EV_FF is ignored) * @code: event code * @value: event value */ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct ff_device *ff = dev->ff; if (type != EV_FF) return 0; switch (code) { case FF_GAIN: if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffffU) break; ff->set_gain(dev, value); break; case FF_AUTOCENTER: if (!test_bit(FF_AUTOCENTER, dev->ffbit) || value > 0xffffU) break; ff->set_autocenter(dev, value); break; default: if (check_effect_access(ff, code, NULL) == 0) ff->playback(dev, code, value); break; } return 0; } EXPORT_SYMBOL_GPL(input_ff_event); /** * input_ff_create() - create force-feedback device * @dev: input device supporting force-feedback * @max_effects: maximum number of effects supported by the device * * This function allocates all necessary memory for a force feedback * portion of an input device and installs all default handlers. * @dev->ffbit should be already set up before calling this function. * Once ff device is created you need to setup its upload, erase, * playback and other handlers before registering input device */ int input_ff_create(struct input_dev *dev, unsigned int max_effects) { struct ff_device *ff; size_t ff_dev_size; int i; if (!max_effects) { dev_err(&dev->dev, "cannot allocate device without any effects\n"); return -EINVAL; } if (max_effects > FF_MAX_EFFECTS) { dev_err(&dev->dev, "cannot allocate more than FF_MAX_EFFECTS effects\n"); return -EINVAL; } ff_dev_size = struct_size(ff, effect_owners, max_effects); if (ff_dev_size == SIZE_MAX) /* overflow */ return -EINVAL; ff = kzalloc(ff_dev_size, GFP_KERNEL); if (!ff) return -ENOMEM; ff->effects = kcalloc(max_effects, sizeof(struct ff_effect), GFP_KERNEL); if (!ff->effects) { kfree(ff); return -ENOMEM; } ff->max_effects = max_effects; mutex_init(&ff->mutex); dev->ff = ff; dev->flush = input_ff_flush; dev->event = input_ff_event; __set_bit(EV_FF, dev->evbit); /* Copy "true" bits into ff device bitmap */ for_each_set_bit(i, dev->ffbit, FF_CNT) __set_bit(i, ff->ffbit); /* we can emulate RUMBLE with periodic effects */ if (test_bit(FF_PERIODIC, ff->ffbit)) __set_bit(FF_RUMBLE, dev->ffbit); return 0; } EXPORT_SYMBOL_GPL(input_ff_create); /** * input_ff_destroy() - frees force feedback portion of input device * @dev: input device supporting force feedback * * This function is only needed in error path as input core will * automatically free force feedback structures when device is * destroyed. */ void input_ff_destroy(struct input_dev *dev) { struct ff_device *ff = dev->ff; __clear_bit(EV_FF, dev->evbit); if (ff) { if (ff->destroy) ff->destroy(ff); kfree(ff->private); kfree(ff->effects); kfree(ff); dev->ff = NULL; } } EXPORT_SYMBOL_GPL(input_ff_destroy);
4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 // SPDX-License-Identifier: GPL-2.0-or-later /* * Connexant Cx11646 library * Copyright (C) 2004 Michel Xhaard mxhaard@magic.fr * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "conex" #include "gspca.h" #define CONEX_CAM 1 /* special JPEG header */ #include "jpeg.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA USB Conexant Camera Driver"); MODULE_LICENSE("GPL"); #define QUALITY 50 /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; struct v4l2_ctrl *sat; u8 jpeg_hdr[JPEG_HDR_SZ]; }; static const struct v4l2_pix_format vga_mode[] = { {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {352, 288, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; /* the read bytes are found in gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, __u16 index, __u16 len) { struct usb_device *dev = gspca_dev->dev; if (len > USB_BUF_SZ) { gspca_err(gspca_dev, "reg_r: buffer overflow\n"); return; } usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, gspca_dev->usb_buf, len, 500); gspca_dbg(gspca_dev, D_USBI, "reg read [%02x] -> %02x ..\n", index, gspca_dev->usb_buf[0]); } /* the bytes to write are in gspca_dev->usb_buf */ static void reg_w_val(struct gspca_dev *gspca_dev, __u16 index, __u8 val) { struct usb_device *dev = gspca_dev->dev; gspca_dev->usb_buf[0] = val; usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, gspca_dev->usb_buf, 1, 500); } static void reg_w(struct gspca_dev *gspca_dev, __u16 index, const __u8 *buffer, __u16 len) { struct usb_device *dev = gspca_dev->dev; if (len > USB_BUF_SZ) { gspca_err(gspca_dev, "reg_w: buffer overflow\n"); return; } gspca_dbg(gspca_dev, D_USBO, "reg write [%02x] = %02x..\n", index, *buffer); memcpy(gspca_dev->usb_buf, buffer, len); usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, gspca_dev->usb_buf, len, 500); } static const __u8 cx_sensor_init[][4] = { {0x88, 0x11, 0x01, 0x01}, {0x88, 0x12, 0x70, 0x01}, {0x88, 0x0f, 0x00, 0x01}, {0x88, 0x05, 0x01, 0x01}, {} }; static const __u8 cx11646_fw1[][3] = { {0x00, 0x02, 0x00}, {0x01, 0x43, 0x00}, {0x02, 0xA7, 0x00}, {0x03, 0x8B, 0x01}, {0x04, 0xE9, 0x02}, {0x05, 0x08, 0x04}, {0x06, 0x08, 0x05}, {0x07, 0x07, 0x06}, {0x08, 0xE7, 0x06}, {0x09, 0xC6, 0x07}, {0x0A, 0x86, 0x08}, {0x0B, 0x46, 0x09}, {0x0C, 0x05, 0x0A}, {0x0D, 0xA5, 0x0A}, {0x0E, 0x45, 0x0B}, {0x0F, 0xE5, 0x0B}, {0x10, 0x85, 0x0C}, {0x11, 0x25, 0x0D}, {0x12, 0xC4, 0x0D}, {0x13, 0x45, 0x0E}, {0x14, 0xE4, 0x0E}, {0x15, 0x64, 0x0F}, {0x16, 0xE4, 0x0F}, {0x17, 0x64, 0x10}, {0x18, 0xE4, 0x10}, {0x19, 0x64, 0x11}, {0x1A, 0xE4, 0x11}, {0x1B, 0x64, 0x12}, {0x1C, 0xE3, 0x12}, {0x1D, 0x44, 0x13}, {0x1E, 0xC3, 0x13}, {0x1F, 0x24, 0x14}, {0x20, 0xA3, 0x14}, {0x21, 0x04, 0x15}, {0x22, 0x83, 0x15}, {0x23, 0xE3, 0x15}, {0x24, 0x43, 0x16}, {0x25, 0xA4, 0x16}, {0x26, 0x23, 0x17}, {0x27, 0x83, 0x17}, {0x28, 0xE3, 0x17}, {0x29, 0x43, 0x18}, {0x2A, 0xA3, 0x18}, {0x2B, 0x03, 0x19}, {0x2C, 0x63, 0x19}, {0x2D, 0xC3, 0x19}, {0x2E, 0x22, 0x1A}, {0x2F, 0x63, 0x1A}, {0x30, 0xC3, 0x1A}, {0x31, 0x23, 0x1B}, {0x32, 0x83, 0x1B}, {0x33, 0xE2, 0x1B}, {0x34, 0x23, 0x1C}, {0x35, 0x83, 0x1C}, {0x36, 0xE2, 0x1C}, {0x37, 0x23, 0x1D}, {0x38, 0x83, 0x1D}, {0x39, 0xE2, 0x1D}, {0x3A, 0x23, 0x1E}, {0x3B, 0x82, 0x1E}, {0x3C, 0xC3, 0x1E}, {0x3D, 0x22, 0x1F}, {0x3E, 0x63, 0x1F}, {0x3F, 0xC1, 0x1F}, {} }; static void cx11646_fw(struct gspca_dev*gspca_dev) { int i = 0; reg_w_val(gspca_dev, 0x006a, 0x02); while (cx11646_fw1[i][1]) { reg_w(gspca_dev, 0x006b, cx11646_fw1[i], 3); i++; } reg_w_val(gspca_dev, 0x006a, 0x00); } static const __u8 cxsensor[] = { 0x88, 0x12, 0x70, 0x01, 0x88, 0x0d, 0x02, 0x01, 0x88, 0x0f, 0x00, 0x01, 0x88, 0x03, 0x71, 0x01, 0x88, 0x04, 0x00, 0x01, /* 3 */ 0x88, 0x02, 0x10, 0x01, 0x88, 0x00, 0xD4, 0x01, 0x88, 0x01, 0x01, 0x01, /* 5 */ 0x88, 0x0B, 0x00, 0x01, 0x88, 0x0A, 0x0A, 0x01, 0x88, 0x00, 0x08, 0x01, 0x88, 0x01, 0x00, 0x01, /* 8 */ 0x88, 0x05, 0x01, 0x01, 0xA1, 0x18, 0x00, 0x01, 0x00 }; static const __u8 reg20[] = { 0x10, 0x42, 0x81, 0x19, 0xd3, 0xff, 0xa7, 0xff }; static const __u8 reg28[] = { 0x87, 0x00, 0x87, 0x00, 0x8f, 0xff, 0xea, 0xff }; static const __u8 reg10[] = { 0xb1, 0xb1 }; static const __u8 reg71a[] = { 0x08, 0x18, 0x0a, 0x1e }; /* 640 */ static const __u8 reg71b[] = { 0x04, 0x0c, 0x05, 0x0f }; /* 352{0x04,0x0a,0x06,0x12}; //352{0x05,0x0e,0x06,0x11}; //352 */ static const __u8 reg71c[] = { 0x02, 0x07, 0x03, 0x09 }; /* 320{0x04,0x0c,0x05,0x0f}; //320 */ static const __u8 reg71d[] = { 0x02, 0x07, 0x03, 0x09 }; /* 176 */ static const __u8 reg7b[] = { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff }; static void cx_sensor(struct gspca_dev*gspca_dev) { int i = 0; int length; const __u8 *ptsensor = cxsensor; reg_w(gspca_dev, 0x0020, reg20, 8); reg_w(gspca_dev, 0x0028, reg28, 8); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0092, 0x03); switch (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) { case 0: reg_w(gspca_dev, 0x0071, reg71a, 4); break; case 1: reg_w(gspca_dev, 0x0071, reg71b, 4); break; default: /* case 2: */ reg_w(gspca_dev, 0x0071, reg71c, 4); break; case 3: reg_w(gspca_dev, 0x0071, reg71d, 4); break; } reg_w(gspca_dev, 0x007b, reg7b, 6); reg_w_val(gspca_dev, 0x00f8, 0x00); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0098, 0x41); for (i = 0; i < 11; i++) { if (i == 3 || i == 5 || i == 8) length = 8; else length = 4; reg_w(gspca_dev, 0x00e5, ptsensor, length); if (length == 4) reg_r(gspca_dev, 0x00e8, 1); else reg_r(gspca_dev, 0x00e8, length); ptsensor += length; } reg_r(gspca_dev, 0x00e7, 8); } static const __u8 cx_inits_176[] = { 0x33, 0x81, 0xB0, 0x00, 0x90, 0x00, 0x0A, 0x03, /* 176x144 */ 0x00, 0x03, 0x03, 0x03, 0x1B, 0x05, 0x30, 0x03, 0x65, 0x15, 0x18, 0x25, 0x03, 0x25, 0x08, 0x30, 0x3B, 0x25, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0xDC, 0xFF, 0xEE, 0xFF, 0xC5, 0xFF, 0xBF, 0xFF, 0xF7, 0xFF, 0x88, 0xFF, 0x66, 0x02, 0x28, 0x02, 0x1E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const __u8 cx_inits_320[] = { 0x7f, 0x7f, 0x40, 0x01, 0xf0, 0x00, 0x02, 0x01, 0x00, 0x01, 0x01, 0x01, 0x10, 0x00, 0x02, 0x01, 0x65, 0x45, 0xfa, 0x4c, 0x2c, 0xdf, 0xb9, 0x81, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xf1, 0xff, 0xc2, 0xff, 0xbc, 0xff, 0xf5, 0xff, 0x6d, 0xff, 0xf6, 0x01, 0x43, 0x02, 0xd3, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const __u8 cx_inits_352[] = { 0x2e, 0x7c, 0x60, 0x01, 0x20, 0x01, 0x05, 0x03, 0x00, 0x06, 0x03, 0x06, 0x1b, 0x10, 0x05, 0x3b, 0x30, 0x25, 0x18, 0x25, 0x08, 0x30, 0x03, 0x25, 0x3b, 0x30, 0x25, 0x1b, 0x10, 0x05, 0x00, 0x00, 0xe3, 0xff, 0xf1, 0xff, 0xc2, 0xff, 0xbc, 0xff, 0xf5, 0xff, 0x6b, 0xff, 0xee, 0x01, 0x43, 0x02, 0xe4, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const __u8 cx_inits_640[] = { 0x7e, 0x7e, 0x80, 0x02, 0xe0, 0x01, 0x01, 0x01, 0x00, 0x02, 0x01, 0x02, 0x10, 0x30, 0x01, 0x01, 0x65, 0x45, 0xf7, 0x52, 0x2c, 0xdf, 0xb9, 0x81, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xf1, 0xff, 0xc2, 0xff, 0xbc, 0xff, 0xf6, 0xff, 0x7b, 0xff, 0x01, 0x02, 0x43, 0x02, 0x77, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void cx11646_initsize(struct gspca_dev *gspca_dev) { const __u8 *cxinit; static const __u8 reg12[] = { 0x08, 0x05, 0x07, 0x04, 0x24 }; static const __u8 reg17[] = { 0x0a, 0x00, 0xf2, 0x01, 0x0f, 0x00, 0x97, 0x02 }; switch (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) { case 0: cxinit = cx_inits_640; break; case 1: cxinit = cx_inits_352; break; default: /* case 2: */ cxinit = cx_inits_320; break; case 3: cxinit = cx_inits_176; break; } reg_w_val(gspca_dev, 0x009a, 0x01); reg_w_val(gspca_dev, 0x0010, 0x10); reg_w(gspca_dev, 0x0012, reg12, 5); reg_w(gspca_dev, 0x0017, reg17, 8); reg_w_val(gspca_dev, 0x00c0, 0x00); reg_w_val(gspca_dev, 0x00c1, 0x04); reg_w_val(gspca_dev, 0x00c2, 0x04); reg_w(gspca_dev, 0x0061, cxinit, 8); cxinit += 8; reg_w(gspca_dev, 0x00ca, cxinit, 8); cxinit += 8; reg_w(gspca_dev, 0x00d2, cxinit, 8); cxinit += 8; reg_w(gspca_dev, 0x00da, cxinit, 6); cxinit += 8; reg_w(gspca_dev, 0x0041, cxinit, 8); cxinit += 8; reg_w(gspca_dev, 0x0049, cxinit, 8); cxinit += 8; reg_w(gspca_dev, 0x0051, cxinit, 2); reg_r(gspca_dev, 0x0010, 1); } static const __u8 cx_jpeg_init[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x15}, /* 1 */ {0x0f, 0x10, 0x12, 0x10, 0x0d, 0x15, 0x12, 0x11}, {0x12, 0x18, 0x16, 0x15, 0x19, 0x20, 0x35, 0x22}, {0x20, 0x1d, 0x1d, 0x20, 0x41, 0x2e, 0x31, 0x26}, {0x35, 0x4d, 0x43, 0x51, 0x4f, 0x4b, 0x43, 0x4a}, {0x49, 0x55, 0x5F, 0x79, 0x67, 0x55, 0x5A, 0x73}, {0x5B, 0x49, 0x4A, 0x6A, 0x90, 0x6B, 0x73, 0x7D}, {0x81, 0x88, 0x89, 0x88, 0x52, 0x66, 0x95, 0xA0}, {0x94, 0x84, 0x9E, 0x79, 0x85, 0x88, 0x83, 0x01}, {0x15, 0x0F, 0x10, 0x12, 0x10, 0x0D, 0x15, 0x12}, {0x11, 0x12, 0x18, 0x16, 0x15, 0x19, 0x20, 0x35}, {0x22, 0x20, 0x1D, 0x1D, 0x20, 0x41, 0x2E, 0x31}, {0x26, 0x35, 0x4D, 0x43, 0x51, 0x4F, 0x4B, 0x43}, {0x4A, 0x49, 0x55, 0x5F, 0x79, 0x67, 0x55, 0x5A}, {0x73, 0x5B, 0x49, 0x4A, 0x6A, 0x90, 0x6B, 0x73}, {0x7D, 0x81, 0x88, 0x89, 0x88, 0x52, 0x66, 0x95}, {0xA0, 0x94, 0x84, 0x9E, 0x79, 0x85, 0x88, 0x83}, {0xFF, 0xC4, 0x01, 0xA2, 0x00, 0x00, 0x01, 0x05}, {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02}, {0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A}, {0x0B, 0x01, 0x00, 0x03, 0x01, 0x01, 0x01, 0x01}, {0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05}, {0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x00}, {0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05}, {0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D, 0x01}, {0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21}, {0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22}, {0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23}, {0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, 0x24}, {0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17}, {0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, 0x29}, {0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A}, {0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A}, {0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A}, {0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A}, {0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A}, {0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A}, {0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99}, {0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8}, {0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7}, {0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6}, {0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5}, {0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, 0xE3}, {0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1}, {0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9}, {0xFA, 0x11, 0x00, 0x02, 0x01, 0x02, 0x04, 0x04}, {0x03, 0x04, 0x07, 0x05, 0x04, 0x04, 0x00, 0x01}, {0x02, 0x77, 0x00, 0x01, 0x02, 0x03, 0x11, 0x04}, {0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07}, {0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14}, {0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33}, {0x52, 0xF0, 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16}, {0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19}, {0x1A, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36}, {0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46}, {0x47, 0x48, 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56}, {0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66}, {0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76}, {0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85}, {0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94}, {0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3}, {0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2}, {0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA}, {0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9}, {0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8}, {0xD9, 0xDA, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7}, {0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6}, {0xF7, 0xF8, 0xF9, 0xFA, 0xFF, 0x20, 0x00, 0x1F}, {0x02, 0x0C, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x11, 0x00, 0x11, 0x22, 0x00, 0x22}, {0x22, 0x11, 0x22, 0x22, 0x11, 0x33, 0x33, 0x11}, {0x44, 0x66, 0x22, 0x55, 0x66, 0xFF, 0xDD, 0x00}, {0x04, 0x00, 0x14, 0xFF, 0xC0, 0x00, 0x11, 0x08}, {0x00, 0xF0, 0x01, 0x40, 0x03, 0x00, 0x21, 0x00}, {0x01, 0x11, 0x01, 0x02, 0x11, 0x01, 0xFF, 0xDA}, {0x00, 0x0C, 0x03, 0x00, 0x00, 0x01, 0x11, 0x02}, {0x11, 0x00, 0x3F, 0x00, 0xFF, 0xD9, 0x00, 0x00} /* 79 */ }; static const __u8 cxjpeg_640[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x10}, /* 1 */ {0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d}, {0x0e, 0x12, 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a}, {0x18, 0x16, 0x16, 0x18, 0x31, 0x23, 0x25, 0x1d}, {0x28, 0x3a, 0x33, 0x3D, 0x3C, 0x39, 0x33, 0x38}, {0x37, 0x40, 0x48, 0x5C, 0x4E, 0x40, 0x44, 0x57}, {0x45, 0x37, 0x38, 0x50, 0x6D, 0x51, 0x57, 0x5F}, {0x62, 0x67, 0x68, 0x67, 0x3E, 0x4D, 0x71, 0x79}, {0x70, 0x64, 0x78, 0x5C, 0x65, 0x67, 0x63, 0x01}, {0x10, 0x0B, 0x0C, 0x0E, 0x0C, 0x0A, 0x10, 0x0E}, {0x0D, 0x0E, 0x12, 0x11, 0x10, 0x13, 0x18, 0x28}, {0x1A, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, 0x25}, {0x1D, 0x28, 0x3A, 0x33, 0x3D, 0x3C, 0x39, 0x33}, {0x38, 0x37, 0x40, 0x48, 0x5C, 0x4E, 0x40, 0x44}, {0x57, 0x45, 0x37, 0x38, 0x50, 0x6D, 0x51, 0x57}, {0x5F, 0x62, 0x67, 0x68, 0x67, 0x3E, 0x4D, 0x71}, {0x79, 0x70, 0x64, 0x78, 0x5C, 0x65, 0x67, 0x63}, {0xFF, 0x20, 0x00, 0x1F, 0x00, 0x83, 0x00, 0x00}, {0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00}, {0x11, 0x22, 0x00, 0x22, 0x22, 0x11, 0x22, 0x22}, {0x11, 0x33, 0x33, 0x11, 0x44, 0x66, 0x22, 0x55}, {0x66, 0xFF, 0xDD, 0x00, 0x04, 0x00, 0x28, 0xFF}, {0xC0, 0x00, 0x11, 0x08, 0x01, 0xE0, 0x02, 0x80}, {0x03, 0x00, 0x21, 0x00, 0x01, 0x11, 0x01, 0x02}, {0x11, 0x01, 0xFF, 0xDA, 0x00, 0x0C, 0x03, 0x00}, {0x00, 0x01, 0x11, 0x02, 0x11, 0x00, 0x3F, 0x00}, {0xFF, 0xD9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} /* 27 */ }; static const __u8 cxjpeg_352[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x0d}, {0x09, 0x09, 0x0b, 0x09, 0x08, 0x0D, 0x0b, 0x0a}, {0x0b, 0x0e, 0x0d, 0x0d, 0x0f, 0x13, 0x1f, 0x14}, {0x13, 0x11, 0x11, 0x13, 0x26, 0x1b, 0x1d, 0x17}, {0x1F, 0x2D, 0x28, 0x30, 0x2F, 0x2D, 0x28, 0x2C}, {0x2B, 0x32, 0x38, 0x48, 0x3D, 0x32, 0x35, 0x44}, {0x36, 0x2B, 0x2C, 0x3F, 0x55, 0x3F, 0x44, 0x4A}, {0x4D, 0x50, 0x51, 0x50, 0x30, 0x3C, 0x58, 0x5F}, {0x58, 0x4E, 0x5E, 0x48, 0x4F, 0x50, 0x4D, 0x01}, {0x0D, 0x09, 0x09, 0x0B, 0x09, 0x08, 0x0D, 0x0B}, {0x0A, 0x0B, 0x0E, 0x0D, 0x0D, 0x0F, 0x13, 0x1F}, {0x14, 0x13, 0x11, 0x11, 0x13, 0x26, 0x1B, 0x1D}, {0x17, 0x1F, 0x2D, 0x28, 0x30, 0x2F, 0x2D, 0x28}, {0x2C, 0x2B, 0x32, 0x38, 0x48, 0x3D, 0x32, 0x35}, {0x44, 0x36, 0x2B, 0x2C, 0x3F, 0x55, 0x3F, 0x44}, {0x4A, 0x4D, 0x50, 0x51, 0x50, 0x30, 0x3C, 0x58}, {0x5F, 0x58, 0x4E, 0x5E, 0x48, 0x4F, 0x50, 0x4D}, {0xFF, 0x20, 0x00, 0x1F, 0x01, 0x83, 0x00, 0x00}, {0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00}, {0x11, 0x22, 0x00, 0x22, 0x22, 0x11, 0x22, 0x22}, {0x11, 0x33, 0x33, 0x11, 0x44, 0x66, 0x22, 0x55}, {0x66, 0xFF, 0xDD, 0x00, 0x04, 0x00, 0x16, 0xFF}, {0xC0, 0x00, 0x11, 0x08, 0x01, 0x20, 0x01, 0x60}, {0x03, 0x00, 0x21, 0x00, 0x01, 0x11, 0x01, 0x02}, {0x11, 0x01, 0xFF, 0xDA, 0x00, 0x0C, 0x03, 0x00}, {0x00, 0x01, 0x11, 0x02, 0x11, 0x00, 0x3F, 0x00}, {0xFF, 0xD9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; static const __u8 cxjpeg_320[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x05}, {0x03, 0x04, 0x04, 0x04, 0x03, 0x05, 0x04, 0x04}, {0x04, 0x05, 0x05, 0x05, 0x06, 0x07, 0x0c, 0x08}, {0x07, 0x07, 0x07, 0x07, 0x0f, 0x0b, 0x0b, 0x09}, {0x0C, 0x11, 0x0F, 0x12, 0x12, 0x11, 0x0f, 0x11}, {0x11, 0x13, 0x16, 0x1C, 0x17, 0x13, 0x14, 0x1A}, {0x15, 0x11, 0x11, 0x18, 0x21, 0x18, 0x1A, 0x1D}, {0x1D, 0x1F, 0x1F, 0x1F, 0x13, 0x17, 0x22, 0x24}, {0x22, 0x1E, 0x24, 0x1C, 0x1E, 0x1F, 0x1E, 0x01}, {0x05, 0x03, 0x04, 0x04, 0x04, 0x03, 0x05, 0x04}, {0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x07, 0x0C}, {0x08, 0x07, 0x07, 0x07, 0x07, 0x0F, 0x0B, 0x0B}, {0x09, 0x0C, 0x11, 0x0F, 0x12, 0x12, 0x11, 0x0F}, {0x11, 0x11, 0x13, 0x16, 0x1C, 0x17, 0x13, 0x14}, {0x1A, 0x15, 0x11, 0x11, 0x18, 0x21, 0x18, 0x1A}, {0x1D, 0x1D, 0x1F, 0x1F, 0x1F, 0x13, 0x17, 0x22}, {0x24, 0x22, 0x1E, 0x24, 0x1C, 0x1E, 0x1F, 0x1E}, {0xFF, 0x20, 0x00, 0x1F, 0x02, 0x0C, 0x00, 0x00}, {0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00}, {0x11, 0x22, 0x00, 0x22, 0x22, 0x11, 0x22, 0x22}, {0x11, 0x33, 0x33, 0x11, 0x44, 0x66, 0x22, 0x55}, {0x66, 0xFF, 0xDD, 0x00, 0x04, 0x00, 0x14, 0xFF}, {0xC0, 0x00, 0x11, 0x08, 0x00, 0xF0, 0x01, 0x40}, {0x03, 0x00, 0x21, 0x00, 0x01, 0x11, 0x01, 0x02}, {0x11, 0x01, 0xFF, 0xDA, 0x00, 0x0C, 0x03, 0x00}, {0x00, 0x01, 0x11, 0x02, 0x11, 0x00, 0x3F, 0x00}, {0xFF, 0xD9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} /* 27 */ }; static const __u8 cxjpeg_176[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x0d}, {0x09, 0x09, 0x0B, 0x09, 0x08, 0x0D, 0x0B, 0x0A}, {0x0B, 0x0E, 0x0D, 0x0D, 0x0F, 0x13, 0x1F, 0x14}, {0x13, 0x11, 0x11, 0x13, 0x26, 0x1B, 0x1D, 0x17}, {0x1F, 0x2D, 0x28, 0x30, 0x2F, 0x2D, 0x28, 0x2C}, {0x2B, 0x32, 0x38, 0x48, 0x3D, 0x32, 0x35, 0x44}, {0x36, 0x2B, 0x2C, 0x3F, 0x55, 0x3F, 0x44, 0x4A}, {0x4D, 0x50, 0x51, 0x50, 0x30, 0x3C, 0x58, 0x5F}, {0x58, 0x4E, 0x5E, 0x48, 0x4F, 0x50, 0x4D, 0x01}, {0x0D, 0x09, 0x09, 0x0B, 0x09, 0x08, 0x0D, 0x0B}, {0x0A, 0x0B, 0x0E, 0x0D, 0x0D, 0x0F, 0x13, 0x1F}, {0x14, 0x13, 0x11, 0x11, 0x13, 0x26, 0x1B, 0x1D}, {0x17, 0x1F, 0x2D, 0x28, 0x30, 0x2F, 0x2D, 0x28}, {0x2C, 0x2B, 0x32, 0x38, 0x48, 0x3D, 0x32, 0x35}, {0x44, 0x36, 0x2B, 0x2C, 0x3F, 0x55, 0x3F, 0x44}, {0x4A, 0x4D, 0x50, 0x51, 0x50, 0x30, 0x3C, 0x58}, {0x5F, 0x58, 0x4E, 0x5E, 0x48, 0x4F, 0x50, 0x4D}, {0xFF, 0x20, 0x00, 0x1F, 0x03, 0xA1, 0x00, 0x00}, {0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00}, {0x11, 0x22, 0x00, 0x22, 0x22, 0x11, 0x22, 0x22}, {0x11, 0x33, 0x33, 0x11, 0x44, 0x66, 0x22, 0x55}, {0x66, 0xFF, 0xDD, 0x00, 0x04, 0x00, 0x0B, 0xFF}, {0xC0, 0x00, 0x11, 0x08, 0x00, 0x90, 0x00, 0xB0}, {0x03, 0x00, 0x21, 0x00, 0x01, 0x11, 0x01, 0x02}, {0x11, 0x01, 0xFF, 0xDA, 0x00, 0x0C, 0x03, 0x00}, {0x00, 0x01, 0x11, 0x02, 0x11, 0x00, 0x3F, 0x00}, {0xFF, 0xD9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; /* 640 take with the zcx30x part */ static const __u8 cxjpeg_qtable[][8] = { {0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84, 0x00, 0x08}, {0x06, 0x06, 0x07, 0x06, 0x05, 0x08, 0x07, 0x07}, {0x07, 0x09, 0x09, 0x08, 0x0a, 0x0c, 0x14, 0x0a}, {0x0c, 0x0b, 0x0b, 0x0c, 0x19, 0x12, 0x13, 0x0f}, {0x14, 0x1d, 0x1a, 0x1f, 0x1e, 0x1d, 0x1a, 0x1c}, {0x1c, 0x20, 0x24, 0x2e, 0x27, 0x20, 0x22, 0x2c}, {0x23, 0x1c, 0x1c, 0x28, 0x37, 0x29, 0x2c, 0x30}, {0x31, 0x34, 0x34, 0x34, 0x1f, 0x27, 0x39, 0x3d}, {0x38, 0x32, 0x3c, 0x2e, 0x33, 0x34, 0x32, 0x01}, {0x09, 0x09, 0x09, 0x0c, 0x0b, 0x0c, 0x18, 0x0a}, {0x0a, 0x18, 0x32, 0x21, 0x1c, 0x21, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32}, {0xFF, 0xD9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} /* 18 */ }; static void cx11646_jpegInit(struct gspca_dev*gspca_dev) { int i; int length; reg_w_val(gspca_dev, 0x00c0, 0x01); reg_w_val(gspca_dev, 0x00c3, 0x00); reg_w_val(gspca_dev, 0x00c0, 0x00); reg_r(gspca_dev, 0x0001, 1); length = 8; for (i = 0; i < 79; i++) { if (i == 78) length = 6; reg_w(gspca_dev, 0x0008, cx_jpeg_init[i], length); } reg_r(gspca_dev, 0x0002, 1); reg_w_val(gspca_dev, 0x0055, 0x14); } static const __u8 reg12[] = { 0x0a, 0x05, 0x07, 0x04, 0x19 }; static const __u8 regE5_8[] = { 0x88, 0x00, 0xd4, 0x01, 0x88, 0x01, 0x01, 0x01 }; static const __u8 regE5a[] = { 0x88, 0x0a, 0x0c, 0x01 }; static const __u8 regE5b[] = { 0x88, 0x0b, 0x12, 0x01 }; static const __u8 regE5c[] = { 0x88, 0x05, 0x01, 0x01 }; static const __u8 reg51[] = { 0x77, 0x03 }; #define reg70 0x03 static void cx11646_jpeg(struct gspca_dev*gspca_dev) { int i; int length; __u8 Reg55; int retry; reg_w_val(gspca_dev, 0x00c0, 0x01); reg_w_val(gspca_dev, 0x00c3, 0x00); reg_w_val(gspca_dev, 0x00c0, 0x00); reg_r(gspca_dev, 0x0001, 1); length = 8; switch (gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv) { case 0: for (i = 0; i < 27; i++) { if (i == 26) length = 2; reg_w(gspca_dev, 0x0008, cxjpeg_640[i], length); } Reg55 = 0x28; break; case 1: for (i = 0; i < 27; i++) { if (i == 26) length = 2; reg_w(gspca_dev, 0x0008, cxjpeg_352[i], length); } Reg55 = 0x16; break; default: /* case 2: */ for (i = 0; i < 27; i++) { if (i == 26) length = 2; reg_w(gspca_dev, 0x0008, cxjpeg_320[i], length); } Reg55 = 0x14; break; case 3: for (i = 0; i < 27; i++) { if (i == 26) length = 2; reg_w(gspca_dev, 0x0008, cxjpeg_176[i], length); } Reg55 = 0x0B; break; } reg_r(gspca_dev, 0x0002, 1); reg_w_val(gspca_dev, 0x0055, Reg55); reg_r(gspca_dev, 0x0002, 1); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0054, 0x02); reg_w_val(gspca_dev, 0x0054, 0x01); reg_w_val(gspca_dev, 0x0000, 0x94); reg_w_val(gspca_dev, 0x0053, 0xc0); reg_w_val(gspca_dev, 0x00fc, 0xe1); reg_w_val(gspca_dev, 0x0000, 0x00); /* wait for completion */ retry = 50; do { reg_r(gspca_dev, 0x0002, 1); /* 0x07 until 0x00 */ if (gspca_dev->usb_buf[0] == 0x00) break; reg_w_val(gspca_dev, 0x0053, 0x00); } while (--retry); if (retry == 0) gspca_err(gspca_dev, "Damned Errors sending jpeg Table\n"); /* send the qtable now */ reg_r(gspca_dev, 0x0001, 1); /* -> 0x18 */ length = 8; for (i = 0; i < 18; i++) { if (i == 17) length = 2; reg_w(gspca_dev, 0x0008, cxjpeg_qtable[i], length); } reg_r(gspca_dev, 0x0002, 1); /* 0x00 */ reg_r(gspca_dev, 0x0053, 1); /* 0x00 */ reg_w_val(gspca_dev, 0x0054, 0x02); reg_w_val(gspca_dev, 0x0054, 0x01); reg_w_val(gspca_dev, 0x0000, 0x94); reg_w_val(gspca_dev, 0x0053, 0xc0); reg_r(gspca_dev, 0x0038, 1); /* 0x40 */ reg_r(gspca_dev, 0x0038, 1); /* 0x40 */ reg_r(gspca_dev, 0x001f, 1); /* 0x38 */ reg_w(gspca_dev, 0x0012, reg12, 5); reg_w(gspca_dev, 0x00e5, regE5_8, 8); reg_r(gspca_dev, 0x00e8, 8); reg_w(gspca_dev, 0x00e5, regE5a, 4); reg_r(gspca_dev, 0x00e8, 1); /* 0x00 */ reg_w_val(gspca_dev, 0x009a, 0x01); reg_w(gspca_dev, 0x00e5, regE5b, 4); reg_r(gspca_dev, 0x00e8, 1); /* 0x00 */ reg_w(gspca_dev, 0x00e5, regE5c, 4); reg_r(gspca_dev, 0x00e8, 1); /* 0x00 */ reg_w(gspca_dev, 0x0051, reg51, 2); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0070, reg70); } static void cx11646_init1(struct gspca_dev *gspca_dev) { int i = 0; reg_w_val(gspca_dev, 0x0010, 0x00); reg_w_val(gspca_dev, 0x0053, 0x00); reg_w_val(gspca_dev, 0x0052, 0x00); reg_w_val(gspca_dev, 0x009b, 0x2f); reg_w_val(gspca_dev, 0x009c, 0x10); reg_r(gspca_dev, 0x0098, 1); reg_w_val(gspca_dev, 0x0098, 0x40); reg_r(gspca_dev, 0x0099, 1); reg_w_val(gspca_dev, 0x0099, 0x07); reg_w_val(gspca_dev, 0x0039, 0x40); reg_w_val(gspca_dev, 0x003c, 0xff); reg_w_val(gspca_dev, 0x003f, 0x1f); reg_w_val(gspca_dev, 0x003d, 0x40); /* reg_w_val(gspca_dev, 0x003d, 0x60); */ reg_r(gspca_dev, 0x0099, 1); /* ->0x07 */ while (cx_sensor_init[i][0]) { reg_w_val(gspca_dev, 0x00e5, cx_sensor_init[i][0]); reg_r(gspca_dev, 0x00e8, 1); /* -> 0x00 */ if (i == 1) { reg_w_val(gspca_dev, 0x00ed, 0x01); reg_r(gspca_dev, 0x00ed, 1); /* -> 0x01 */ } i++; } reg_w_val(gspca_dev, 0x00c3, 0x00); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam; cam = &gspca_dev->cam; cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { cx11646_init1(gspca_dev); cx11646_initsize(gspca_dev); cx11646_fw(gspca_dev); cx_sensor(gspca_dev); cx11646_jpegInit(gspca_dev); return 0; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* create the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x22); /* JPEG 411 */ jpeg_set_qual(sd->jpeg_hdr, QUALITY); cx11646_initsize(gspca_dev); cx11646_fw(gspca_dev); cx_sensor(gspca_dev); cx11646_jpeg(gspca_dev); return 0; } /* called on streamoff with alt 0 and on disconnect */ static void sd_stop0(struct gspca_dev *gspca_dev) { int retry = 50; if (!gspca_dev->present) return; reg_w_val(gspca_dev, 0x0000, 0x00); reg_r(gspca_dev, 0x0002, 1); reg_w_val(gspca_dev, 0x0053, 0x00); while (retry--) { /* reg_r(gspca_dev, 0x0002, 1);*/ reg_r(gspca_dev, 0x0053, 1); if (gspca_dev->usb_buf[0] == 0) break; } reg_w_val(gspca_dev, 0x0000, 0x00); reg_r(gspca_dev, 0x0002, 1); reg_w_val(gspca_dev, 0x0010, 0x00); reg_r(gspca_dev, 0x0033, 1); reg_w_val(gspca_dev, 0x00fc, 0xe0); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; if (data[0] == 0xff && data[1] == 0xd8) { /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); /* put the JPEG header in the new frame */ gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); data += 2; len -= 2; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void setbrightness(struct gspca_dev *gspca_dev, s32 val, s32 sat) { __u8 regE5cbx[] = { 0x88, 0x00, 0xd4, 0x01, 0x88, 0x01, 0x01, 0x01 }; __u8 reg51c[2]; regE5cbx[2] = val; reg_w(gspca_dev, 0x00e5, regE5cbx, 8); reg_r(gspca_dev, 0x00e8, 8); reg_w(gspca_dev, 0x00e5, regE5c, 4); reg_r(gspca_dev, 0x00e8, 1); /* 0x00 */ reg51c[0] = 0x77; reg51c[1] = sat; reg_w(gspca_dev, 0x0051, reg51c, 2); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0070, reg70); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val, s32 sat) { __u8 regE5acx[] = { 0x88, 0x0a, 0x0c, 0x01 }; /* seem MSB */ /* __u8 regE5bcx[] = { 0x88, 0x0b, 0x12, 0x01}; * LSB */ __u8 reg51c[2]; regE5acx[2] = val; reg_w(gspca_dev, 0x00e5, regE5acx, 4); reg_r(gspca_dev, 0x00e8, 1); /* 0x00 */ reg51c[0] = 0x77; reg51c[1] = sat; reg_w(gspca_dev, 0x0051, reg51c, 2); reg_w(gspca_dev, 0x0010, reg10, 2); reg_w_val(gspca_dev, 0x0070, reg70); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val, sd->sat->cur.val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val, sd->sat->cur.val); break; case V4L2_CID_SATURATION: setbrightness(gspca_dev, sd->brightness->cur.val, ctrl->val); setcontrast(gspca_dev, sd->contrast->cur.val, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 3); sd->brightness = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 0xd4); sd->contrast = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0x0a, 0x1f, 1, 0x0c); sd->sat = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 7, 1, 3); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0572, 0x0041)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
4 4 4 4 4 4 4 2 4 7 7 1 4 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 // SPDX-License-Identifier: GPL-2.0 /* * Greybus "AP" USB driver for "ES2" controller chips * * Copyright 2014-2015 Google Inc. * Copyright 2014-2015 Linaro Ltd. */ #include <linux/kthread.h> #include <linux/sizes.h> #include <linux/usb.h> #include <linux/kfifo.h> #include <linux/debugfs.h> #include <linux/list.h> #include <linux/greybus.h> #include <linux/unaligned.h> #include "arpc.h" #include "greybus_trace.h" /* Default timeout for USB vendor requests. */ #define ES2_USB_CTRL_TIMEOUT 500 /* Default timeout for ARPC CPort requests */ #define ES2_ARPC_CPORT_TIMEOUT 500 /* Fixed CPort numbers */ #define ES2_CPORT_CDSI0 16 #define ES2_CPORT_CDSI1 17 /* Memory sizes for the buffers sent to/from the ES2 controller */ #define ES2_GBUF_MSG_SIZE_MAX 2048 /* Memory sizes for the ARPC buffers */ #define ARPC_OUT_SIZE_MAX U16_MAX #define ARPC_IN_SIZE_MAX 128 static const struct usb_device_id id_table[] = { { USB_DEVICE(0x18d1, 0x1eaf) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); #define APB1_LOG_SIZE SZ_16K /* * Number of CPort IN urbs in flight at any point in time. * Adjust if we are having stalls in the USB buffer due to not enough urbs in * flight. */ #define NUM_CPORT_IN_URB 4 /* Number of CPort OUT urbs in flight at any point in time. * Adjust if we get messages saying we are out of urbs in the system log. */ #define NUM_CPORT_OUT_URB 8 /* * Number of ARPC in urbs in flight at any point in time. */ #define NUM_ARPC_IN_URB 2 /* * @endpoint: bulk in endpoint for CPort data * @urb: array of urbs for the CPort in messages * @buffer: array of buffers for the @cport_in_urb urbs */ struct es2_cport_in { __u8 endpoint; struct urb *urb[NUM_CPORT_IN_URB]; u8 *buffer[NUM_CPORT_IN_URB]; }; /** * struct es2_ap_dev - ES2 USB Bridge to AP structure * @usb_dev: pointer to the USB device we are. * @usb_intf: pointer to the USB interface we are bound to. * @hd: pointer to our gb_host_device structure * * @cport_in: endpoint, urbs and buffer for cport in messages * @cport_out_endpoint: endpoint for cport out messages * @cport_out_urb: array of urbs for the CPort out messages * @cport_out_urb_busy: array of flags to see if the @cport_out_urb is busy or * not. * @cport_out_urb_cancelled: array of flags indicating whether the * corresponding @cport_out_urb is being cancelled * @cport_out_urb_lock: locks the @cport_out_urb_busy "list" * @cdsi1_in_use: true if cport CDSI1 is in use * @apb_log_task: task pointer for logging thread * @apb_log_dentry: file system entry for the log file interface * @apb_log_enable_dentry: file system entry for enabling logging * @apb_log_fifo: kernel FIFO to carry logged data * @arpc_urb: array of urbs for the ARPC in messages * @arpc_buffer: array of buffers for the @arpc_urb urbs * @arpc_endpoint_in: bulk in endpoint for APBridgeA RPC * @arpc_id_cycle: gives an unique id to ARPC * @arpc_lock: locks ARPC list * @arpcs: list of in progress ARPCs */ struct es2_ap_dev { struct usb_device *usb_dev; struct usb_interface *usb_intf; struct gb_host_device *hd; struct es2_cport_in cport_in; __u8 cport_out_endpoint; struct urb *cport_out_urb[NUM_CPORT_OUT_URB]; bool cport_out_urb_busy[NUM_CPORT_OUT_URB]; bool cport_out_urb_cancelled[NUM_CPORT_OUT_URB]; spinlock_t cport_out_urb_lock; bool cdsi1_in_use; struct task_struct *apb_log_task; struct dentry *apb_log_dentry; struct dentry *apb_log_enable_dentry; DECLARE_KFIFO(apb_log_fifo, char, APB1_LOG_SIZE); __u8 arpc_endpoint_in; struct urb *arpc_urb[NUM_ARPC_IN_URB]; u8 *arpc_buffer[NUM_ARPC_IN_URB]; int arpc_id_cycle; spinlock_t arpc_lock; struct list_head arpcs; }; struct arpc { struct list_head list; struct arpc_request_message *req; struct arpc_response_message *resp; struct completion response_received; bool active; }; static inline struct es2_ap_dev *hd_to_es2(struct gb_host_device *hd) { return (struct es2_ap_dev *)&hd->hd_priv; } static void cport_out_callback(struct urb *urb); static void usb_log_enable(struct es2_ap_dev *es2); static void usb_log_disable(struct es2_ap_dev *es2); static int arpc_sync(struct es2_ap_dev *es2, u8 type, void *payload, size_t size, int *result, unsigned int timeout); static int output_sync(struct es2_ap_dev *es2, void *req, u16 size, u8 cmd) { struct usb_device *udev = es2->usb_dev; u8 *data; int retval; data = kmemdup(req, size, GFP_KERNEL); if (!data) return -ENOMEM; retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, 0, data, size, ES2_USB_CTRL_TIMEOUT); if (retval < 0) dev_err(&udev->dev, "%s: return error %d\n", __func__, retval); else retval = 0; kfree(data); return retval; } static void ap_urb_complete(struct urb *urb) { struct usb_ctrlrequest *dr = urb->context; kfree(dr); usb_free_urb(urb); } static int output_async(struct es2_ap_dev *es2, void *req, u16 size, u8 cmd) { struct usb_device *udev = es2->usb_dev; struct urb *urb; struct usb_ctrlrequest *dr; u8 *buf; int retval; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; dr = kmalloc(sizeof(*dr) + size, GFP_ATOMIC); if (!dr) { usb_free_urb(urb); return -ENOMEM; } buf = (u8 *)dr + sizeof(*dr); memcpy(buf, req, size); dr->bRequest = cmd; dr->bRequestType = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE; dr->wValue = 0; dr->wIndex = 0; dr->wLength = cpu_to_le16(size); usb_fill_control_urb(urb, udev, usb_sndctrlpipe(udev, 0), (unsigned char *)dr, buf, size, ap_urb_complete, dr); retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) { usb_free_urb(urb); kfree(dr); } return retval; } static int output(struct gb_host_device *hd, void *req, u16 size, u8 cmd, bool async) { struct es2_ap_dev *es2 = hd_to_es2(hd); if (async) return output_async(es2, req, size, cmd); return output_sync(es2, req, size, cmd); } static int es2_cport_in_enable(struct es2_ap_dev *es2, struct es2_cport_in *cport_in) { struct urb *urb; int ret; int i; for (i = 0; i < NUM_CPORT_IN_URB; ++i) { urb = cport_in->urb[i]; ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { dev_err(&es2->usb_dev->dev, "failed to submit in-urb: %d\n", ret); goto err_kill_urbs; } } return 0; err_kill_urbs: for (--i; i >= 0; --i) { urb = cport_in->urb[i]; usb_kill_urb(urb); } return ret; } static void es2_cport_in_disable(struct es2_ap_dev *es2, struct es2_cport_in *cport_in) { struct urb *urb; int i; for (i = 0; i < NUM_CPORT_IN_URB; ++i) { urb = cport_in->urb[i]; usb_kill_urb(urb); } } static int es2_arpc_in_enable(struct es2_ap_dev *es2) { struct urb *urb; int ret; int i; for (i = 0; i < NUM_ARPC_IN_URB; ++i) { urb = es2->arpc_urb[i]; ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { dev_err(&es2->usb_dev->dev, "failed to submit arpc in-urb: %d\n", ret); goto err_kill_urbs; } } return 0; err_kill_urbs: for (--i; i >= 0; --i) { urb = es2->arpc_urb[i]; usb_kill_urb(urb); } return ret; } static void es2_arpc_in_disable(struct es2_ap_dev *es2) { struct urb *urb; int i; for (i = 0; i < NUM_ARPC_IN_URB; ++i) { urb = es2->arpc_urb[i]; usb_kill_urb(urb); } } static struct urb *next_free_urb(struct es2_ap_dev *es2, gfp_t gfp_mask) { struct urb *urb = NULL; unsigned long flags; int i; spin_lock_irqsave(&es2->cport_out_urb_lock, flags); /* Look in our pool of allocated urbs first, as that's the "fastest" */ for (i = 0; i < NUM_CPORT_OUT_URB; ++i) { if (!es2->cport_out_urb_busy[i] && !es2->cport_out_urb_cancelled[i]) { es2->cport_out_urb_busy[i] = true; urb = es2->cport_out_urb[i]; break; } } spin_unlock_irqrestore(&es2->cport_out_urb_lock, flags); if (urb) return urb; /* * Crap, pool is empty, complain to the syslog and go allocate one * dynamically as we have to succeed. */ dev_dbg(&es2->usb_dev->dev, "No free CPort OUT urbs, having to dynamically allocate one!\n"); return usb_alloc_urb(0, gfp_mask); } static void free_urb(struct es2_ap_dev *es2, struct urb *urb) { unsigned long flags; int i; /* * See if this was an urb in our pool, if so mark it "free", otherwise * we need to free it ourselves. */ spin_lock_irqsave(&es2->cport_out_urb_lock, flags); for (i = 0; i < NUM_CPORT_OUT_URB; ++i) { if (urb == es2->cport_out_urb[i]) { es2->cport_out_urb_busy[i] = false; urb = NULL; break; } } spin_unlock_irqrestore(&es2->cport_out_urb_lock, flags); /* If urb is not NULL, then we need to free this urb */ usb_free_urb(urb); } /* * We (ab)use the operation-message header pad bytes to transfer the * cport id in order to minimise overhead. */ static void gb_message_cport_pack(struct gb_operation_msg_hdr *header, u16 cport_id) { header->pad[0] = cport_id; } /* Clear the pad bytes used for the CPort id */ static void gb_message_cport_clear(struct gb_operation_msg_hdr *header) { header->pad[0] = 0; } /* Extract the CPort id packed into the header, and clear it */ static u16 gb_message_cport_unpack(struct gb_operation_msg_hdr *header) { u16 cport_id = header->pad[0]; gb_message_cport_clear(header); return cport_id; } /* * Returns zero if the message was successfully queued, or a negative errno * otherwise. */ static int message_send(struct gb_host_device *hd, u16 cport_id, struct gb_message *message, gfp_t gfp_mask) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct usb_device *udev = es2->usb_dev; size_t buffer_size; int retval; struct urb *urb; unsigned long flags; /* * The data actually transferred will include an indication * of where the data should be sent. Do one last check of * the target CPort id before filling it in. */ if (!cport_id_valid(hd, cport_id)) { dev_err(&udev->dev, "invalid cport %u\n", cport_id); return -EINVAL; } /* Find a free urb */ urb = next_free_urb(es2, gfp_mask); if (!urb) return -ENOMEM; spin_lock_irqsave(&es2->cport_out_urb_lock, flags); message->hcpriv = urb; spin_unlock_irqrestore(&es2->cport_out_urb_lock, flags); /* Pack the cport id into the message header */ gb_message_cport_pack(message->header, cport_id); buffer_size = sizeof(*message->header) + message->payload_size; usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, es2->cport_out_endpoint), message->buffer, buffer_size, cport_out_callback, message); urb->transfer_flags |= URB_ZERO_PACKET; trace_gb_message_submit(message); retval = usb_submit_urb(urb, gfp_mask); if (retval) { dev_err(&udev->dev, "failed to submit out-urb: %d\n", retval); spin_lock_irqsave(&es2->cport_out_urb_lock, flags); message->hcpriv = NULL; spin_unlock_irqrestore(&es2->cport_out_urb_lock, flags); free_urb(es2, urb); gb_message_cport_clear(message->header); return retval; } return 0; } /* * Can not be called in atomic context. */ static void message_cancel(struct gb_message *message) { struct gb_host_device *hd = message->operation->connection->hd; struct es2_ap_dev *es2 = hd_to_es2(hd); struct urb *urb; int i; might_sleep(); spin_lock_irq(&es2->cport_out_urb_lock); urb = message->hcpriv; /* Prevent dynamically allocated urb from being deallocated. */ usb_get_urb(urb); /* Prevent pre-allocated urb from being reused. */ for (i = 0; i < NUM_CPORT_OUT_URB; ++i) { if (urb == es2->cport_out_urb[i]) { es2->cport_out_urb_cancelled[i] = true; break; } } spin_unlock_irq(&es2->cport_out_urb_lock); usb_kill_urb(urb); if (i < NUM_CPORT_OUT_URB) { spin_lock_irq(&es2->cport_out_urb_lock); es2->cport_out_urb_cancelled[i] = false; spin_unlock_irq(&es2->cport_out_urb_lock); } usb_free_urb(urb); } static int es2_cport_allocate(struct gb_host_device *hd, int cport_id, unsigned long flags) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct ida *id_map = &hd->cport_id_map; int ida_start, ida_end; switch (cport_id) { case ES2_CPORT_CDSI0: case ES2_CPORT_CDSI1: dev_err(&hd->dev, "cport %d not available\n", cport_id); return -EBUSY; } if (flags & GB_CONNECTION_FLAG_OFFLOADED && flags & GB_CONNECTION_FLAG_CDSI1) { if (es2->cdsi1_in_use) { dev_err(&hd->dev, "CDSI1 already in use\n"); return -EBUSY; } es2->cdsi1_in_use = true; return ES2_CPORT_CDSI1; } if (cport_id < 0) { ida_start = 0; ida_end = hd->num_cports - 1; } else if (cport_id < hd->num_cports) { ida_start = cport_id; ida_end = cport_id; } else { dev_err(&hd->dev, "cport %d not available\n", cport_id); return -EINVAL; } return ida_alloc_range(id_map, ida_start, ida_end, GFP_KERNEL); } static void es2_cport_release(struct gb_host_device *hd, u16 cport_id) { struct es2_ap_dev *es2 = hd_to_es2(hd); switch (cport_id) { case ES2_CPORT_CDSI1: es2->cdsi1_in_use = false; return; } ida_free(&hd->cport_id_map, cport_id); } static int cport_enable(struct gb_host_device *hd, u16 cport_id, unsigned long flags) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct usb_device *udev = es2->usb_dev; struct gb_apb_request_cport_flags *req; u32 connection_flags; int ret; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; connection_flags = 0; if (flags & GB_CONNECTION_FLAG_CONTROL) connection_flags |= GB_APB_CPORT_FLAG_CONTROL; if (flags & GB_CONNECTION_FLAG_HIGH_PRIO) connection_flags |= GB_APB_CPORT_FLAG_HIGH_PRIO; req->flags = cpu_to_le32(connection_flags); dev_dbg(&hd->dev, "%s - cport = %u, flags = %02x\n", __func__, cport_id, connection_flags); ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), GB_APB_REQUEST_CPORT_FLAGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, cport_id, 0, req, sizeof(*req), ES2_USB_CTRL_TIMEOUT); if (ret < 0) { dev_err(&udev->dev, "failed to set cport flags for port %d\n", cport_id); goto out; } ret = 0; out: kfree(req); return ret; } static int es2_cport_connected(struct gb_host_device *hd, u16 cport_id) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct device *dev = &es2->usb_dev->dev; struct arpc_cport_connected_req req; int ret; req.cport_id = cpu_to_le16(cport_id); ret = arpc_sync(es2, ARPC_TYPE_CPORT_CONNECTED, &req, sizeof(req), NULL, ES2_ARPC_CPORT_TIMEOUT); if (ret) { dev_err(dev, "failed to set connected state for cport %u: %d\n", cport_id, ret); return ret; } return 0; } static int es2_cport_flush(struct gb_host_device *hd, u16 cport_id) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct device *dev = &es2->usb_dev->dev; struct arpc_cport_flush_req req; int ret; req.cport_id = cpu_to_le16(cport_id); ret = arpc_sync(es2, ARPC_TYPE_CPORT_FLUSH, &req, sizeof(req), NULL, ES2_ARPC_CPORT_TIMEOUT); if (ret) { dev_err(dev, "failed to flush cport %u: %d\n", cport_id, ret); return ret; } return 0; } static int es2_cport_shutdown(struct gb_host_device *hd, u16 cport_id, u8 phase, unsigned int timeout) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct device *dev = &es2->usb_dev->dev; struct arpc_cport_shutdown_req req; int result; int ret; if (timeout > U16_MAX) return -EINVAL; req.cport_id = cpu_to_le16(cport_id); req.timeout = cpu_to_le16(timeout); req.phase = phase; ret = arpc_sync(es2, ARPC_TYPE_CPORT_SHUTDOWN, &req, sizeof(req), &result, ES2_ARPC_CPORT_TIMEOUT + timeout); if (ret) { dev_err(dev, "failed to send shutdown over cport %u: %d (%d)\n", cport_id, ret, result); return ret; } return 0; } static int es2_cport_quiesce(struct gb_host_device *hd, u16 cport_id, size_t peer_space, unsigned int timeout) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct device *dev = &es2->usb_dev->dev; struct arpc_cport_quiesce_req req; int result; int ret; if (peer_space > U16_MAX) return -EINVAL; if (timeout > U16_MAX) return -EINVAL; req.cport_id = cpu_to_le16(cport_id); req.peer_space = cpu_to_le16(peer_space); req.timeout = cpu_to_le16(timeout); ret = arpc_sync(es2, ARPC_TYPE_CPORT_QUIESCE, &req, sizeof(req), &result, ES2_ARPC_CPORT_TIMEOUT + timeout); if (ret) { dev_err(dev, "failed to quiesce cport %u: %d (%d)\n", cport_id, ret, result); return ret; } return 0; } static int es2_cport_clear(struct gb_host_device *hd, u16 cport_id) { struct es2_ap_dev *es2 = hd_to_es2(hd); struct device *dev = &es2->usb_dev->dev; struct arpc_cport_clear_req req; int ret; req.cport_id = cpu_to_le16(cport_id); ret = arpc_sync(es2, ARPC_TYPE_CPORT_CLEAR, &req, sizeof(req), NULL, ES2_ARPC_CPORT_TIMEOUT); if (ret) { dev_err(dev, "failed to clear cport %u: %d\n", cport_id, ret); return ret; } return 0; } static int latency_tag_enable(struct gb_host_device *hd, u16 cport_id) { int retval; struct es2_ap_dev *es2 = hd_to_es2(hd); struct usb_device *udev = es2->usb_dev; retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), GB_APB_REQUEST_LATENCY_TAG_EN, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, cport_id, 0, NULL, 0, ES2_USB_CTRL_TIMEOUT); if (retval < 0) dev_err(&udev->dev, "Cannot enable latency tag for cport %d\n", cport_id); return retval; } static int latency_tag_disable(struct gb_host_device *hd, u16 cport_id) { int retval; struct es2_ap_dev *es2 = hd_to_es2(hd); struct usb_device *udev = es2->usb_dev; retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), GB_APB_REQUEST_LATENCY_TAG_DIS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, cport_id, 0, NULL, 0, ES2_USB_CTRL_TIMEOUT); if (retval < 0) dev_err(&udev->dev, "Cannot disable latency tag for cport %d\n", cport_id); return retval; } static struct gb_hd_driver es2_driver = { .hd_priv_size = sizeof(struct es2_ap_dev), .message_send = message_send, .message_cancel = message_cancel, .cport_allocate = es2_cport_allocate, .cport_release = es2_cport_release, .cport_enable = cport_enable, .cport_connected = es2_cport_connected, .cport_flush = es2_cport_flush, .cport_shutdown = es2_cport_shutdown, .cport_quiesce = es2_cport_quiesce, .cport_clear = es2_cport_clear, .latency_tag_enable = latency_tag_enable, .latency_tag_disable = latency_tag_disable, .output = output, }; /* Common function to report consistent warnings based on URB status */ static int check_urb_status(struct urb *urb) { struct device *dev = &urb->dev->dev; int status = urb->status; switch (status) { case 0: return 0; case -EOVERFLOW: dev_err(dev, "%s: overflow actual length is %d\n", __func__, urb->actual_length); fallthrough; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EILSEQ: case -EPROTO: /* device is gone, stop sending */ return status; } dev_err(dev, "%s: unknown status %d\n", __func__, status); return -EAGAIN; } static void es2_destroy(struct es2_ap_dev *es2) { struct usb_device *udev; struct urb *urb; int i; debugfs_remove(es2->apb_log_enable_dentry); usb_log_disable(es2); /* Tear down everything! */ for (i = 0; i < NUM_CPORT_OUT_URB; ++i) { urb = es2->cport_out_urb[i]; usb_kill_urb(urb); usb_free_urb(urb); es2->cport_out_urb[i] = NULL; es2->cport_out_urb_busy[i] = false; /* just to be anal */ } for (i = 0; i < NUM_ARPC_IN_URB; ++i) { usb_free_urb(es2->arpc_urb[i]); kfree(es2->arpc_buffer[i]); es2->arpc_buffer[i] = NULL; } for (i = 0; i < NUM_CPORT_IN_URB; ++i) { usb_free_urb(es2->cport_in.urb[i]); kfree(es2->cport_in.buffer[i]); es2->cport_in.buffer[i] = NULL; } /* release reserved CDSI0 and CDSI1 cports */ gb_hd_cport_release_reserved(es2->hd, ES2_CPORT_CDSI1); gb_hd_cport_release_reserved(es2->hd, ES2_CPORT_CDSI0); udev = es2->usb_dev; gb_hd_put(es2->hd); usb_put_dev(udev); } static void cport_in_callback(struct urb *urb) { struct gb_host_device *hd = urb->context; struct device *dev = &urb->dev->dev; struct gb_operation_msg_hdr *header; int status = check_urb_status(urb); int retval; u16 cport_id; if (status) { if ((status == -EAGAIN) || (status == -EPROTO)) goto exit; /* The urb is being unlinked */ if (status == -ENOENT || status == -ESHUTDOWN) return; dev_err(dev, "urb cport in error %d (dropped)\n", status); return; } if (urb->actual_length < sizeof(*header)) { dev_err(dev, "short message received\n"); goto exit; } /* Extract the CPort id, which is packed in the message header */ header = urb->transfer_buffer; cport_id = gb_message_cport_unpack(header); if (cport_id_valid(hd, cport_id)) { greybus_data_rcvd(hd, cport_id, urb->transfer_buffer, urb->actual_length); } else { dev_err(dev, "invalid cport id %u received\n", cport_id); } exit: /* put our urb back in the request pool */ retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "failed to resubmit in-urb: %d\n", retval); } static void cport_out_callback(struct urb *urb) { struct gb_message *message = urb->context; struct gb_host_device *hd = message->operation->connection->hd; struct es2_ap_dev *es2 = hd_to_es2(hd); int status = check_urb_status(urb); unsigned long flags; gb_message_cport_clear(message->header); spin_lock_irqsave(&es2->cport_out_urb_lock, flags); message->hcpriv = NULL; spin_unlock_irqrestore(&es2->cport_out_urb_lock, flags); /* * Tell the submitter that the message send (attempt) is * complete, and report the status. */ greybus_message_sent(hd, message, status); free_urb(es2, urb); } static struct arpc *arpc_alloc(void *payload, u16 size, u8 type) { struct arpc *rpc; if (size + sizeof(*rpc->req) > ARPC_OUT_SIZE_MAX) return NULL; rpc = kzalloc(sizeof(*rpc), GFP_KERNEL); if (!rpc) return NULL; INIT_LIST_HEAD(&rpc->list); rpc->req = kzalloc(sizeof(*rpc->req) + size, GFP_KERNEL); if (!rpc->req) goto err_free_rpc; rpc->resp = kzalloc(sizeof(*rpc->resp), GFP_KERNEL); if (!rpc->resp) goto err_free_req; rpc->req->type = type; rpc->req->size = cpu_to_le16(sizeof(*rpc->req) + size); memcpy(rpc->req->data, payload, size); init_completion(&rpc->response_received); return rpc; err_free_req: kfree(rpc->req); err_free_rpc: kfree(rpc); return NULL; } static void arpc_free(struct arpc *rpc) { kfree(rpc->req); kfree(rpc->resp); kfree(rpc); } static struct arpc *arpc_find(struct es2_ap_dev *es2, __le16 id) { struct arpc *rpc; list_for_each_entry(rpc, &es2->arpcs, list) { if (rpc->req->id == id) return rpc; } return NULL; } static void arpc_add(struct es2_ap_dev *es2, struct arpc *rpc) { rpc->active = true; rpc->req->id = cpu_to_le16(es2->arpc_id_cycle++); list_add_tail(&rpc->list, &es2->arpcs); } static void arpc_del(struct es2_ap_dev *es2, struct arpc *rpc) { if (rpc->active) { rpc->active = false; list_del(&rpc->list); } } static int arpc_send(struct es2_ap_dev *es2, struct arpc *rpc, int timeout) { struct usb_device *udev = es2->usb_dev; int retval; retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), GB_APB_REQUEST_ARPC_RUN, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, 0, rpc->req, le16_to_cpu(rpc->req->size), ES2_USB_CTRL_TIMEOUT); if (retval < 0) { dev_err(&udev->dev, "failed to send ARPC request %d: %d\n", rpc->req->type, retval); return retval; } return 0; } static int arpc_sync(struct es2_ap_dev *es2, u8 type, void *payload, size_t size, int *result, unsigned int timeout) { struct arpc *rpc; unsigned long flags; int retval; if (result) *result = 0; rpc = arpc_alloc(payload, size, type); if (!rpc) return -ENOMEM; spin_lock_irqsave(&es2->arpc_lock, flags); arpc_add(es2, rpc); spin_unlock_irqrestore(&es2->arpc_lock, flags); retval = arpc_send(es2, rpc, timeout); if (retval) goto out_arpc_del; retval = wait_for_completion_interruptible_timeout( &rpc->response_received, msecs_to_jiffies(timeout)); if (retval <= 0) { if (!retval) retval = -ETIMEDOUT; goto out_arpc_del; } if (rpc->resp->result) { retval = -EREMOTEIO; if (result) *result = rpc->resp->result; } else { retval = 0; } out_arpc_del: spin_lock_irqsave(&es2->arpc_lock, flags); arpc_del(es2, rpc); spin_unlock_irqrestore(&es2->arpc_lock, flags); arpc_free(rpc); if (retval < 0 && retval != -EREMOTEIO) { dev_err(&es2->usb_dev->dev, "failed to execute ARPC: %d\n", retval); } return retval; } static void arpc_in_callback(struct urb *urb) { struct es2_ap_dev *es2 = urb->context; struct device *dev = &urb->dev->dev; int status = check_urb_status(urb); struct arpc *rpc; struct arpc_response_message *resp; unsigned long flags; int retval; if (status) { if ((status == -EAGAIN) || (status == -EPROTO)) goto exit; /* The urb is being unlinked */ if (status == -ENOENT || status == -ESHUTDOWN) return; dev_err(dev, "arpc in-urb error %d (dropped)\n", status); return; } if (urb->actual_length < sizeof(*resp)) { dev_err(dev, "short aprc response received\n"); goto exit; } resp = urb->transfer_buffer; spin_lock_irqsave(&es2->arpc_lock, flags); rpc = arpc_find(es2, resp->id); if (!rpc) { dev_err(dev, "invalid arpc response id received: %u\n", le16_to_cpu(resp->id)); spin_unlock_irqrestore(&es2->arpc_lock, flags); goto exit; } arpc_del(es2, rpc); memcpy(rpc->resp, resp, sizeof(*resp)); complete(&rpc->response_received); spin_unlock_irqrestore(&es2->arpc_lock, flags); exit: /* put our urb back in the request pool */ retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "failed to resubmit arpc in-urb: %d\n", retval); } #define APB1_LOG_MSG_SIZE 64 static void apb_log_get(struct es2_ap_dev *es2, char *buf) { int retval; do { retval = usb_control_msg(es2->usb_dev, usb_rcvctrlpipe(es2->usb_dev, 0), GB_APB_REQUEST_LOG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0x00, 0x00, buf, APB1_LOG_MSG_SIZE, ES2_USB_CTRL_TIMEOUT); if (retval > 0) kfifo_in(&es2->apb_log_fifo, buf, retval); } while (retval > 0); } static int apb_log_poll(void *data) { struct es2_ap_dev *es2 = data; char *buf; buf = kmalloc(APB1_LOG_MSG_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; while (!kthread_should_stop()) { msleep(1000); apb_log_get(es2, buf); } kfree(buf); return 0; } static ssize_t apb_log_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { struct es2_ap_dev *es2 = file_inode(f)->i_private; ssize_t ret; size_t copied; char *tmp_buf; if (count > APB1_LOG_SIZE) count = APB1_LOG_SIZE; tmp_buf = kmalloc(count, GFP_KERNEL); if (!tmp_buf) return -ENOMEM; copied = kfifo_out(&es2->apb_log_fifo, tmp_buf, count); ret = simple_read_from_buffer(buf, count, ppos, tmp_buf, copied); kfree(tmp_buf); return ret; } static const struct file_operations apb_log_fops = { .read = apb_log_read, }; static void usb_log_enable(struct es2_ap_dev *es2) { if (!IS_ERR_OR_NULL(es2->apb_log_task)) return; /* get log from APB1 */ es2->apb_log_task = kthread_run(apb_log_poll, es2, "apb_log"); if (IS_ERR(es2->apb_log_task)) return; /* XXX We will need to rename this per APB */ es2->apb_log_dentry = debugfs_create_file("apb_log", 0444, gb_debugfs_get(), es2, &apb_log_fops); } static void usb_log_disable(struct es2_ap_dev *es2) { if (IS_ERR_OR_NULL(es2->apb_log_task)) return; debugfs_remove(es2->apb_log_dentry); es2->apb_log_dentry = NULL; kthread_stop(es2->apb_log_task); es2->apb_log_task = NULL; } static ssize_t apb_log_enable_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { struct es2_ap_dev *es2 = file_inode(f)->i_private; int enable = !IS_ERR_OR_NULL(es2->apb_log_task); char tmp_buf[3]; sprintf(tmp_buf, "%d\n", enable); return simple_read_from_buffer(buf, count, ppos, tmp_buf, 2); } static ssize_t apb_log_enable_write(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { int enable; ssize_t retval; struct es2_ap_dev *es2 = file_inode(f)->i_private; retval = kstrtoint_from_user(buf, count, 10, &enable); if (retval) return retval; if (enable) usb_log_enable(es2); else usb_log_disable(es2); return count; } static const struct file_operations apb_log_enable_fops = { .read = apb_log_enable_read, .write = apb_log_enable_write, }; static int apb_get_cport_count(struct usb_device *udev) { int retval; __le16 *cport_count; cport_count = kzalloc(sizeof(*cport_count), GFP_KERNEL); if (!cport_count) return -ENOMEM; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), GB_APB_REQUEST_CPORT_COUNT, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, 0, cport_count, sizeof(*cport_count), ES2_USB_CTRL_TIMEOUT); if (retval != sizeof(*cport_count)) { dev_err(&udev->dev, "Cannot retrieve CPort count: %d\n", retval); if (retval >= 0) retval = -EIO; goto out; } retval = le16_to_cpu(*cport_count); /* We need to fit a CPort ID in one byte of a message header */ if (retval > U8_MAX) { retval = U8_MAX; dev_warn(&udev->dev, "Limiting number of CPorts to U8_MAX\n"); } out: kfree(cport_count); return retval; } /* * The ES2 USB Bridge device has 15 endpoints * 1 Control - usual USB stuff + AP -> APBridgeA messages * 7 Bulk IN - CPort data in * 7 Bulk OUT - CPort data out */ static int ap_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct es2_ap_dev *es2; struct gb_host_device *hd; struct usb_device *udev; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; __u8 ep_addr; int retval; int i; int num_cports; bool bulk_out_found = false; bool bulk_in_found = false; bool arpc_in_found = false; udev = usb_get_dev(interface_to_usbdev(interface)); num_cports = apb_get_cport_count(udev); if (num_cports < 0) { usb_put_dev(udev); dev_err(&udev->dev, "Cannot retrieve CPort count: %d\n", num_cports); return num_cports; } hd = gb_hd_create(&es2_driver, &udev->dev, ES2_GBUF_MSG_SIZE_MAX, num_cports); if (IS_ERR(hd)) { usb_put_dev(udev); return PTR_ERR(hd); } es2 = hd_to_es2(hd); es2->hd = hd; es2->usb_intf = interface; es2->usb_dev = udev; spin_lock_init(&es2->cport_out_urb_lock); INIT_KFIFO(es2->apb_log_fifo); usb_set_intfdata(interface, es2); /* * Reserve the CDSI0 and CDSI1 CPorts so they won't be allocated * dynamically. */ retval = gb_hd_cport_reserve(hd, ES2_CPORT_CDSI0); if (retval) goto error; retval = gb_hd_cport_reserve(hd, ES2_CPORT_CDSI1); if (retval) goto error; /* find all bulk endpoints */ iface_desc = interface->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; ep_addr = endpoint->bEndpointAddress; if (usb_endpoint_is_bulk_in(endpoint)) { if (!bulk_in_found) { es2->cport_in.endpoint = ep_addr; bulk_in_found = true; } else if (!arpc_in_found) { es2->arpc_endpoint_in = ep_addr; arpc_in_found = true; } else { dev_warn(&udev->dev, "Unused bulk IN endpoint found: 0x%02x\n", ep_addr); } continue; } if (usb_endpoint_is_bulk_out(endpoint)) { if (!bulk_out_found) { es2->cport_out_endpoint = ep_addr; bulk_out_found = true; } else { dev_warn(&udev->dev, "Unused bulk OUT endpoint found: 0x%02x\n", ep_addr); } continue; } dev_warn(&udev->dev, "Unknown endpoint type found, address 0x%02x\n", ep_addr); } if (!bulk_in_found || !arpc_in_found || !bulk_out_found) { dev_err(&udev->dev, "Not enough endpoints found in device, aborting!\n"); retval = -ENODEV; goto error; } /* Allocate buffers for our cport in messages */ for (i = 0; i < NUM_CPORT_IN_URB; ++i) { struct urb *urb; u8 *buffer; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { retval = -ENOMEM; goto error; } es2->cport_in.urb[i] = urb; buffer = kmalloc(ES2_GBUF_MSG_SIZE_MAX, GFP_KERNEL); if (!buffer) { retval = -ENOMEM; goto error; } usb_fill_bulk_urb(urb, udev, usb_rcvbulkpipe(udev, es2->cport_in.endpoint), buffer, ES2_GBUF_MSG_SIZE_MAX, cport_in_callback, hd); es2->cport_in.buffer[i] = buffer; } /* Allocate buffers for ARPC in messages */ for (i = 0; i < NUM_ARPC_IN_URB; ++i) { struct urb *urb; u8 *buffer; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { retval = -ENOMEM; goto error; } es2->arpc_urb[i] = urb; buffer = kmalloc(ARPC_IN_SIZE_MAX, GFP_KERNEL); if (!buffer) { retval = -ENOMEM; goto error; } usb_fill_bulk_urb(urb, udev, usb_rcvbulkpipe(udev, es2->arpc_endpoint_in), buffer, ARPC_IN_SIZE_MAX, arpc_in_callback, es2); es2->arpc_buffer[i] = buffer; } /* Allocate urbs for our CPort OUT messages */ for (i = 0; i < NUM_CPORT_OUT_URB; ++i) { struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { retval = -ENOMEM; goto error; } es2->cport_out_urb[i] = urb; es2->cport_out_urb_busy[i] = false; /* just to be anal */ } /* XXX We will need to rename this per APB */ es2->apb_log_enable_dentry = debugfs_create_file("apb_log_enable", 0644, gb_debugfs_get(), es2, &apb_log_enable_fops); INIT_LIST_HEAD(&es2->arpcs); spin_lock_init(&es2->arpc_lock); retval = es2_arpc_in_enable(es2); if (retval) goto error; retval = gb_hd_add(hd); if (retval) goto err_disable_arpc_in; retval = es2_cport_in_enable(es2, &es2->cport_in); if (retval) goto err_hd_del; return 0; err_hd_del: gb_hd_del(hd); err_disable_arpc_in: es2_arpc_in_disable(es2); error: es2_destroy(es2); return retval; } static void ap_disconnect(struct usb_interface *interface) { struct es2_ap_dev *es2 = usb_get_intfdata(interface); gb_hd_del(es2->hd); es2_cport_in_disable(es2, &es2->cport_in); es2_arpc_in_disable(es2); es2_destroy(es2); } static struct usb_driver es2_ap_driver = { .name = "es2_ap_driver", .probe = ap_probe, .disconnect = ap_disconnect, .id_table = id_table, .soft_unbind = 1, }; module_usb_driver(es2_ap_driver); MODULE_DESCRIPTION("Greybus AP USB driver for ES2 controller chips"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Greg Kroah-Hartman <gregkh@linuxfoundation.org>");
1 1 75 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 // SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_FILTER, }; static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static bool forward __read_mostly = true; module_param(forward, bool, 0000); static int iptable_filter_table_init(struct net *net) { struct ipt_replace *repl; int err; repl = ipt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : NF_DROP - 1; err = ipt_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); return err; } static int __net_init iptable_filter_net_init(struct net *net) { if (!forward) return iptable_filter_table_init(net); return 0; } static void __net_exit iptable_filter_net_pre_exit(struct net *net) { ipt_unregister_table_pre_exit(net, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) { ipt_unregister_table_exit(net, "filter"); } static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, .pre_exit = iptable_filter_net_pre_exit, .exit = iptable_filter_net_exit, }; static int __init iptable_filter_init(void) { int ret = xt_register_template(&packet_filter, iptable_filter_table_init); if (ret < 0) return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); } ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) { xt_unregister_template(&packet_filter); kfree(filter_ops); return ret; } return 0; } static void __exit iptable_filter_fini(void) { unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); kfree(filter_ops); } module_init(iptable_filter_init); module_exit(iptable_filter_fini);
9 8 1 3 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 // SPDX-License-Identifier: GPL-2.0-only /* * This module is used to copy security markings from packets * to connections, and restore security markings from connections * back to packets. This would normally be performed in conjunction * with the SECMARK target and state match. * * Based somewhat on CONNMARK: * Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CONNSECMARK.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark"); MODULE_ALIAS("ipt_CONNSECMARK"); MODULE_ALIAS("ip6t_CONNSECMARK"); /* * If the packet has a security mark and the connection does not, copy * the security mark from the packet to the connection. */ static void secmark_save(const struct sk_buff *skb) { if (skb->secmark) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); if (ct && !ct->secmark) { ct->secmark = skb->secmark; nf_conntrack_event_cache(IPCT_SECMARK, ct); } } } /* * If packet has no security mark, and the connection does, restore the * security mark from the connection to the packet. */ static void secmark_restore(struct sk_buff *skb) { if (!skb->secmark) { const struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); if (ct && ct->secmark) skb->secmark = ct->secmark; } } static unsigned int connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connsecmark_target_info *info = par->targinfo; switch (info->mode) { case CONNSECMARK_SAVE: secmark_save(skb); break; case CONNSECMARK_RESTORE: secmark_restore(skb); break; default: BUG(); } return XT_CONTINUE; } static int connsecmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_connsecmark_target_info *info = par->targinfo; int ret; if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", par->table); return -EINVAL; } switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: break; default: pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_target connsecmark_tg_reg __read_mostly = { .name = "CONNSECMARK", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = connsecmark_tg_check, .destroy = connsecmark_tg_destroy, .target = connsecmark_tg, .targetsize = sizeof(struct xt_connsecmark_target_info), .me = THIS_MODULE, }; static int __init connsecmark_tg_init(void) { return xt_register_target(&connsecmark_tg_reg); } static void __exit connsecmark_tg_exit(void) { xt_unregister_target(&connsecmark_tg_reg); } module_init(connsecmark_tg_init); module_exit(connsecmark_tg_exit);
10 4 2 4 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP6 tables REJECT target module * Linux INET6 implementation * * Copyright (C)2003 USAGI/WIDE Project * * Authors: * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> * * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net> * * Based on net/ipv4/netfilter/ipt_REJECT.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/gfp.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> #include <net/icmp.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> #include <net/netfilter/ipv6/nf_reject.h> MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; struct net *net = xt_net(par); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par)); break; case IP6T_ICMP6_ADM_PROHIBITED: nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, xt_hooknum(par)); break; case IP6T_ICMP6_NOT_NEIGHBOUR: nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, xt_hooknum(par)); break; case IP6T_ICMP6_ADDR_UNREACH: nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, xt_hooknum(par)); break; case IP6T_ICMP6_PORT_UNREACH: nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, xt_hooknum(par)); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: nf_send_reset6(net, par->state->sk, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); break; case IP6T_ICMP6_REJECT_ROUTE: nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, xt_hooknum(par)); break; } return NF_DROP; } static int reject_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_reject_info *rejinfo = par->targinfo; const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { pr_info_ratelimited("ECHOREPLY is not supported\n"); return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { pr_info_ratelimited("TCP_RESET illegal for non-tcp\n"); return -EINVAL; } } return 0; } static struct xt_target reject_tg6_reg __read_mostly = { .name = "REJECT", .family = NFPROTO_IPV6, .target = reject_tg6, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT), .checkentry = reject_tg6_check, .me = THIS_MODULE }; static int __init reject_tg6_init(void) { return xt_register_target(&reject_tg6_reg); } static void __exit reject_tg6_exit(void) { xt_unregister_target(&reject_tg6_reg); } module_init(reject_tg6_init); module_exit(reject_tg6_exit);
242 2477 3841 1922 1918 139 121 3 3845 3848 2452 1867 1994 357 2577 236 238 236 238 238 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H #include <linux/init.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/if.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> #include <linux/module.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> #include <net/net_namespace.h> static inline int NF_DROP_GETERR(int verdict) { return -(verdict >> NF_VERDICT_QBITS); } static __always_inline int NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) { BUILD_BUG_ON(err > 0xffff); kfree_skb_reason(skb, reason); return ((err << 16) | NF_STOLEN); } static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ul2 = (const unsigned long *)a2; return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; #else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; #endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, union nf_inet_addr *result, const union nf_inet_addr *mask) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ua = (const unsigned long *)a1; unsigned long *ur = (unsigned long *)result; const unsigned long *um = (const unsigned long *)mask; ur[0] = ua[0] & um[0]; ur[1] = ua[1] & um[1]; #else result->all[0] = a1->all[0] & mask->all[0]; result->all[1] = a1->all[1] & mask->all[1]; result->all[2] = a1->all[2] & mask->all[2]; result->all[3] = a1->all[3] & mask->all[3]; #endif } int netfilter_init(void); struct sk_buff; struct nf_hook_ops; struct sock; struct nf_hook_state { u8 hook; u8 pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, NF_HOOK_OP_BPF, }; struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; void *priv; u8 pf; enum nf_hook_ops_type hook_ops_type:8; unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; struct nf_hook_entry { nf_hookfn *hook; void *priv; }; struct nf_hook_entries_rcu_head { struct rcu_head head; void *allocation; }; struct nf_hook_entries { u16 num_hook_entries; /* padding */ struct nf_hook_entry hooks[]; /* trailer: pointers to original orig_ops of each hook, * followed by rcu_head and scratch space used for freeing * the structure via call_rcu. * * This is not part of struct nf_hook_entry since its only * needed in slow path (hook register/unregister): * const struct nf_hook_ops *orig_ops[] * * For the same reason, we store this at end -- its * only needed when a hook is deleted, not during * packet path processing: * struct nf_hook_entries_rcu_head head */ }; #ifdef CONFIG_NETFILTER static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { unsigned int n = e->num_hook_entries; const void *hook_end; hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ return (struct nf_hook_ops **)hook_end; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { return entry->hook(entry->priv, skb, state); } static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; p->okfn = okfn; } struct nf_sockopt_ops { struct list_head list; u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, sockptr_t arg, unsigned int len); int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); /* Use the module struct to lock set/get code in place */ struct module *owner; }; /* Function to register/unregister hook points. */ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef CONFIG_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *e, unsigned int i); void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e); /** * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return 1; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); return ret; } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: > I don't want nf_hook to return anything because people might forget > about async and trust the return value to mean "packet was ok". AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); if (ret == 1) ret = okfn(net, sk, skb); return ret; } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); nf_hook_slow_list(head, &state, hook_head); } rcu_read_unlock(); } /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); struct flowi; struct nf_queue_entry; __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol, unsigned short family); __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); #include <net/flow.h> struct nf_conn; enum nf_nat_manip_type; struct nlattr; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); void (*remove_nat_bysrc)(struct nf_conn *ct); }; extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook && nat_hook->decode_session) nat_hook->decode_session(skb, fl); rcu_read_unlock(); #endif } #else /* !CONFIG_NETFILTER */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { return okfn(net, sk, skb); } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return okfn(net, sk, skb); } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { /* nothing to do */ } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return 1; } struct flowi; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { } #endif /*CONFIG_NETFILTER*/ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_zones_common.h> void nf_ct_attach(struct sk_buff *, const struct sk_buff *); void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { return false; } #endif struct nf_conn; enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u_int16_t ct_attr, u_int16_t ct_info_attr); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; struct nf_defrag_hook { struct module *owner; int (*enable)(struct net *net); void (*disable)(struct net *net); }; extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; /* * nf_skb_duplicated - TEE target has sent a packet * * When a xtables target sends a packet, the OUTPUT and POSTROUTING * hooks are traversed again, i.e. nft and xtables are invoked recursively. * * This is used by xtables TEE target to prevent the duplicated skb from * being duplicated again. */ DECLARE_PER_CPU(bool, nf_skb_duplicated); /* * Contains bitmask of ctnetlink event subscribers, if any. * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. */ extern u8 nf_ctnetlink_has_listener; #endif /*__LINUX_NETFILTER_H*/
4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 // SPDX-License-Identifier: GPL-2.0-only /* * Common interrupt code for 32 and 64 bit */ #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/of.h> #include <linux/seq_file.h> #include <linux/smp.h> #include <linux/ftrace.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/irq.h> #include <asm/irq_stack.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/irq.h> #include <asm/mce.h> #include <asm/hw_irq.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/thermal.h> #include <asm/posted_intr.h> #include <asm/irq_remapping.h> #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); atomic_t irq_err_count; /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. */ void ack_bad_irq(unsigned int irq) { if (printk_ratelimit()) pr_err("unexpected IRQ trap at vector %02x\n", irq); /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N * irq slots per priority level, and a 'hanging, unacked' IRQ * holds up an irq slot - in excessive cases (when multiple * unexpected vectors occur) that might lock up the APIC * completely. * But only ack when the APIC is enabled -AK */ apic_eoi(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing for arch specific interrupts */ int arch_show_interrupts(struct seq_file *p, int prec) { int j; seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); seq_puts(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_puts(p, " Local timer interrupts\n"); seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_puts(p, " Spurious interrupts\n"); seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_puts(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "IWI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); seq_puts(p, " IRQ work interrupts\n"); seq_printf(p, "%*s: ", prec, "RTR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); seq_puts(p, " APIC ICR read retries\n"); if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); seq_puts(p, " Platform interrupts\n"); } #endif #ifdef CONFIG_SMP seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); seq_puts(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); seq_puts(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_puts(p, " TLB shootdowns\n"); #endif #ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_puts(p, " Thermal event interrupts\n"); #endif #ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_puts(p, " Threshold APIC interrupts\n"); #endif #ifdef CONFIG_X86_MCE_AMD seq_printf(p, "%*s: ", prec, "DFR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); seq_puts(p, " Deferred Error APIC interrupts\n"); #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); seq_puts(p, " Machine check exceptions\n"); seq_printf(p, "%*s: ", prec, "MCP"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_puts(p, " Machine check polls\n"); #endif #ifdef CONFIG_X86_HV_CALLBACK_VECTOR if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); seq_puts(p, " Hypervisor callback interrupts\n"); } #endif #if IS_ENABLED(CONFIG_HYPERV) if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HRE"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_reenlightenment_count); seq_puts(p, " Hyper-V reenlightenment interrupts\n"); } if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HVS"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->hyperv_stimer0_count); seq_puts(p, " Hyper-V stimer0 interrupts\n"); } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif #if IS_ENABLED(CONFIG_KVM) seq_printf(p, "%*s: ", prec, "PIN"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); seq_puts(p, " Posted-interrupt notification event\n"); seq_printf(p, "%*s: ", prec, "NPI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_nested_ipis); seq_puts(p, " Nested posted-interrupt event\n"); seq_printf(p, "%*s: ", prec, "PIW"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_wakeup_ipis); seq_puts(p, " Posted-interrupt wakeup event\n"); #endif #ifdef CONFIG_X86_POSTED_MSI seq_printf(p, "%*s: ", prec, "PMN"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->posted_msi_notification_count); seq_puts(p, " Posted MSI notification event\n"); #endif return 0; } /* * /proc/stat helpers */ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = irq_stats(cpu)->__nmi_count; #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_irq_work_irqs; sum += irq_stats(cpu)->icr_read_retry_count; if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; #endif #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; #endif #ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif #ifdef CONFIG_X86_HV_CALLBACK_VECTOR sum += irq_stats(cpu)->irq_hv_callback_count; #endif #if IS_ENABLED(CONFIG_HYPERV) sum += irq_stats(cpu)->irq_hv_reenlightenment_count; sum += irq_stats(cpu)->hyperv_stimer0_count; #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); #endif return sum; } u64 arch_irq_stat(void) { u64 sum = atomic_read(&irq_err_count); return sum; } static __always_inline void handle_irq(struct irq_desc *desc, struct pt_regs *regs) { if (IS_ENABLED(CONFIG_X86_64)) generic_handle_irq_desc(desc); else __handle_irq(desc, regs); } static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) { struct irq_desc *desc; int ret = 0; desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); } else { ret = -EINVAL; if (desc == VECTOR_UNUSED) { pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", __func__, smp_processor_id(), vector); } else { __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); } } return ret; } /* * common_interrupt() handles all normal device IRQ's (the special SMP * cross-CPU interrupts have their own entry points). */ DEFINE_IDTENTRY_IRQ(common_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); if (unlikely(call_irq_handler(vector, regs))) apic_eoi(); set_irq_regs(old_regs); } #ifdef CONFIG_X86_LOCAL_APIC /* Function pointer for generic interrupt vector handling */ void (*x86_platform_ipi_callback)(void) = NULL; /* * Handler for X86_PLATFORM_IPI_VECTOR. */ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) { struct pt_regs *old_regs = set_irq_regs(regs); apic_eoi(); trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) x86_platform_ipi_callback(); trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); set_irq_regs(old_regs); } #endif #if IS_ENABLED(CONFIG_KVM) static void dummy_handler(void) {} static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) { if (handler) kvm_posted_intr_wakeup_handler = handler; else { kvm_posted_intr_wakeup_handler = dummy_handler; synchronize_rcu(); } } EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); /* * Handler for POSTED_INTERRUPT_VECTOR. */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_ipis); } /* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); kvm_posted_intr_wakeup_handler(); } /* * Handler for POSTED_INTERRUPT_NESTED_VECTOR. */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_nested_ipis); } #endif #ifdef CONFIG_X86_POSTED_MSI /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); void intel_posted_msi_init(void) { u32 destination; u32 apic_id; this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); /* * APIC destination ID is stored in bit 8:15 while in XAPIC mode. * VT-d spec. CH 9.11 */ apic_id = this_cpu_read(x86_cpu_to_apicid); destination = x2apic_enabled() ? apic_id : apic_id << 8; this_cpu_write(posted_msi_pi_desc.ndst, destination); } /* * De-multiplexing posted interrupts is on the performance path, the code * below is written to optimize the cache performance based on the following * considerations: * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently * accessed by both CPU and IOMMU. * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg * for checking and clearing posted interrupt request (PIR), a 256 bit field * within the PID. * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache * line when posting interrupts and setting control bits. * 4.The CPU can access the cache line a magnitude faster than the IOMMU. * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID * cache line. The cache line states after each operation are as follows: * CPU IOMMU PID Cache line state * --------------------------------------------------------------- *...read64 exclusive *...lock xchg64 modified *... post/atomic swap invalid *...------------------------------------------------------------- * * To reduce L1 data cache miss, it is important to avoid contention with * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used * to dispatch interrupt handlers. * * In addition, the code is trying to keep the cache line state consistent * as much as possible. e.g. when making a copy and clearing the PIR * (assuming non-zero PIR bits are present in the entire PIR), it does: * read, read, read, read, xchg, xchg, xchg, xchg * instead of: * read, xchg, read, xchg, read, xchg, read, xchg */ static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) { int i, vec = FIRST_EXTERNAL_VECTOR; unsigned long pir_copy[4]; bool handled = false; for (i = 0; i < 4; i++) pir_copy[i] = pir[i]; for (i = 0; i < 4; i++) { if (!pir_copy[i]) continue; pir_copy[i] = arch_xchg(&pir[i], 0); handled = true; } if (handled) { for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) call_irq_handler(vec, regs); } return handled; } /* * Performance data shows that 3 is good enough to harvest 90+% of the benefit * on high IRQ rate workload. */ #define MAX_POSTED_MSI_COALESCING_LOOP 3 /* * For MSIs that are delivered as posted interrupts, the CPU notifications * can be coalesced if the MSIs arrive in high frequency bursts. */ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) { struct pt_regs *old_regs = set_irq_regs(regs); struct pi_desc *pid; int i = 0; pid = this_cpu_ptr(&posted_msi_pi_desc); inc_irq_stat(posted_msi_notification_count); irq_enter(); /* * Max coalescing count includes the extra round of handle_pending_pir * after clearing the outstanding notification bit. Hence, at most * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. */ while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { if (!handle_pending_pir(pid->pir64, regs)) break; } /* * Clear outstanding notification bit to allow new IRQ notifications, * do this last to maximize the window of interrupt coalescing. */ pi_clear_on(pid); /* * There could be a race of PI notification and the clearing of ON bit, * process PIR bits one last time such that handling the new interrupts * are not delayed until the next IRQ. */ handle_pending_pir(pid->pir64, regs); apic_eoi(); irq_exit(); set_irq_regs(old_regs); } #endif /* X86_POSTED_MSI */ #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { unsigned int vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; irq_migrate_all_off_this_cpu(); /* * We can remove mdelay() and then send spurious interrupts to * new cpu targets for all the irqs that were handled previously by * this cpu. While it works, I have seen spurious interrupt messages * (nothing wrong but still...). * * So for now, retain mdelay(1) and check the IRR and then send those * interrupts to new targets as this cpu is already offlined... */ mdelay(1); /* * We can walk the vector array of this cpu without holding * vector_lock because the cpu is already marked !online, so * nothing else will touch it. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; if (is_vector_pending(vector)) { desc = __this_cpu_read(vector_irq[vector]); raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); } raw_spin_unlock(&desc->lock); } if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); } } #endif #ifdef CONFIG_X86_THERMAL_VECTOR static void smp_thermal_vector(void) { if (x86_thermal_enabled()) intel_thermal_interrupt(); else pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", smp_processor_id()); } DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) { trace_thermal_apic_entry(THERMAL_APIC_VECTOR); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); apic_eoi(); } #endif
19 19 19 19 9 9 9 9 19 1 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 // SPDX-License-Identifier: GPL-2.0 /* * Parts of this file are * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> #include "nl80211.h" #include "core.h" #include "rdev-ops.h" static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, unsigned int link_id, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->stop_ap) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!wdev->links[link_id].ap.beacon_interval) return -ENOENT; err = rdev_stop_ap(rdev, dev, link_id); if (!err) { wdev->conn_owner_nlportid = 0; wdev->links[link_id].ap.beacon_interval = 0; memset(&wdev->links[link_id].ap.chandef, 0, sizeof(wdev->links[link_id].ap.chandef)); wdev->u.ap.ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev, link_id); /* Should we apply the grace period during beaconing interface * shutdown also? */ cfg80211_sched_dfs_chan_update(rdev); } schedule_work(&cfg80211_disconnect_work); return err; } int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link_id, bool notify) { unsigned int link; int ret = 0; if (link_id >= 0) return ___cfg80211_stop_ap(rdev, dev, link_id, notify); for_each_valid_link(dev->ieee80211_ptr, link) { int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify); if (ret1) ret = ret1; /* try the next one also if one errored */ } return ret; }
17 1 2 6 6 2 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 // SPDX-License-Identifier: GPL-2.0-only /* iptables module for the packet checksum mangling * * (C) 2002 by Harald Welte <laforge@netfilter.org> * (C) 2010 Red Hat, Inc. * * Author: Michael S. Tsirkin <mst@redhat.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CHECKSUM.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>"); MODULE_DESCRIPTION("Xtables: checksum modification"); MODULE_ALIAS("ipt_CHECKSUM"); MODULE_ALIAS("ip6t_CHECKSUM"); static unsigned int checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) { if (skb->ip_summed == CHECKSUM_PARTIAL && !skb_is_gso(skb)) skb_checksum_help(skb); return XT_CONTINUE; } static int checksum_tg_check(const struct xt_tgchk_param *par) { const struct xt_CHECKSUM_info *einfo = par->targinfo; const struct ip6t_ip6 *i6 = par->entryinfo; const struct ipt_ip *i4 = par->entryinfo; if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { pr_info_ratelimited("unsupported CHECKSUM operation %x\n", einfo->operation); return -EINVAL; } if (!einfo->operation) return -EINVAL; switch (par->family) { case NFPROTO_IPV4: if (i4->proto == IPPROTO_UDP && (i4->invflags & XT_INV_PROTO) == 0) return 0; break; case NFPROTO_IPV6: if ((i6->flags & IP6T_F_PROTO) && i6->proto == IPPROTO_UDP && (i6->invflags & XT_INV_PROTO) == 0) return 0; break; } pr_warn_once("CHECKSUM should be avoided. If really needed, restrict with \"-p udp\" and only use in OUTPUT\n"); return 0; } static struct xt_target checksum_tg_reg __read_mostly = { .name = "CHECKSUM", .family = NFPROTO_UNSPEC, .target = checksum_tg, .targetsize = sizeof(struct xt_CHECKSUM_info), .table = "mangle", .checkentry = checksum_tg_check, .me = THIS_MODULE, }; static int __init checksum_tg_init(void) { return xt_register_target(&checksum_tg_reg); } static void __exit checksum_tg_exit(void) { xt_unregister_target(&checksum_tg_reg); } module_init(checksum_tg_init); module_exit(checksum_tg_exit);
74 68 68 6 6 68 68 68 68 68 6 6 11 6 15 15 15 4 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 Syncookies implementation for the Linux kernel * * Authors: * Glenn Griffin <ggriffin.kernel@gmail.com> * * Based on IPv4 implementation by Andi Kleen * linux/net/ipv4/syncookies.c */ #include <linux/tcp.h> #include <linux/random.h> #include <linux/siphash.h> #include <linux/kernel.h> #include <net/secure_seq.h> #include <net/ipv6.h> #include <net/tcp.h> #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) static siphash_aligned_key_t syncookie6_secret[2]; /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] * * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows * using higher values than ipv4 tcp syncookies. * The other values are chosen based on ethernet (1500 and 9k MTU), plus * one that accounts for common encap (PPPoe) overhead. Table must be sorted. */ static __u16 const msstab[] = { 1280 - 60, /* IPV6_MIN_MTU - 60 */ 1480 - 60, 1500 - 60, 9000 - 60, }; static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) { const struct { struct in6_addr saddr; struct in6_addr daddr; u32 count; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *saddr, .daddr = *daddr, .count = count, .sport = sport, .dport = dport }; net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); return siphash(&combined, offsetofend(typeof(combined), dport), &syncookie6_secret[c]); } static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, __u32 sseq, __u32 data) { u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) & COOKIEMASK)); } static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, __u32 sseq) { __u32 diff, count = tcp_cookie_time(); cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) & COOKIEMASK; } u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, __u16 *mssp) { int mssind; const __u16 mss = *mssp; for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) if (mss >= msstab[mssind]) break; *mssp = msstab[mssind]; return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); return __cookie_v6_init_sequence(iph, th, mssp); } int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; __u32 mssind; mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } EXPORT_SYMBOL_GPL(__cookie_v6_check); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; u32 tsoff = 0; int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb)); if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb, &tcp_opt, mss, tsoff); out: return ERR_PTR(-EINVAL); } struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct net *net = sock_net(sk); struct request_sock *req; struct dst_entry *dst; struct sock *ret = sk; __u8 rcv_wscale; int full_space; SKB_DR(reason); if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; if (cookie_bpf_ok(skb)) { req = cookie_bpf_check(sk, skb); } else { req = cookie_tcp_check(net, sk, skb); if (IS_ERR(req)) goto out; } if (!req) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (security_inet_conn_request(sk, skb, req)) { SKB_DR_SET(reason, SECURITY_HOOK); goto out_free; } if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { refcount_inc(&skb->users); ireq->pktopts = skb; } /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); tcp_ao_syncookie(sk, skb, req, AF_INET6); /* * We need to lookup the dst_entry to get the correct window size. * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten * me if there is a preferred way. */ { struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = ireq->ir_iif; fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) { SKB_DR_SET(reason, IP_OUTNOROUTES); goto out_free; } } req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ full_space = tcp_full_space(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); /* req->syncookie is set true only if ACK is validated * by BPF kfunc, then, rcv_wscale is already configured. */ if (!req->syncookie) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); if (!ret) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } out: return ret; out_free: reqsk_free(req); out_drop: sk_skb_reason_drop(sk, skb, reason); return NULL; }
33 156 157 156 254 255 253 425 423 425 463 466 465 468 464 465 70 70 70 254 255 255 6 6 156 156 32 32 2 9 9 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 // SPDX-License-Identifier: GPL-2.0 #include "blk-rq-qos.h" /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. */ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); do { if (cur >= below) return false; } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) { return atomic_inc_below(&rq_wait->inflight, limit); } void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->cleanup) rqos->ops->cleanup(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->done) rqos->ops->done(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->issue) rqos->ops->issue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->requeue) rqos->ops->requeue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->throttle) rqos->ops->throttle(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->track) rqos->ops->track(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->merge) rqos->ops->merge(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->done_bio) rqos->ops->done_bio(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_queue_depth_changed(struct rq_qos *rqos) { do { if (rqos->ops->queue_depth_changed) rqos->ops->queue_depth_changed(rqos); rqos = rqos->next; } while (rqos); } /* * Return true, if we can't increase the depth further by scaling */ bool rq_depth_calc_max_depth(struct rq_depth *rqd) { unsigned int depth; bool ret = false; /* * For QD=1 devices, this is a special case. It's important for those * to have one request ready when one completes, so force a depth of * 2 for those devices. On the backend, it'll be a depth of 1 anyway, * since the device can't have more than that in flight. If we're * scaling down, then keep a setting of 1/1/1. */ if (rqd->queue_depth == 1) { if (rqd->scale_step > 0) rqd->max_depth = 1; else { rqd->max_depth = 2; ret = true; } } else { /* * scale_step == 0 is our default state. If we have suffered * latency spikes, step will be > 0, and we shrink the * allowed write depths. If step is < 0, we're only doing * writes, and we allow a temporarily higher depth to * increase performance. */ depth = min_t(unsigned int, rqd->default_depth, rqd->queue_depth); if (rqd->scale_step > 0) depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); else if (rqd->scale_step < 0) { unsigned int maxd = 3 * rqd->queue_depth / 4; depth = 1 + ((depth - 1) << -rqd->scale_step); if (depth > maxd) { depth = maxd; ret = true; } } rqd->max_depth = depth; } return ret; } /* Returns true on success and false if scaling up wasn't possible */ bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we * had a latency violation. Returns true on success and returns false if * scaling down wasn't possible. */ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents * ->scale_step from going to crazy values, if the device can't * keep up. */ if (rqd->max_depth == 1) return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; else rqd->scale_step++; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); return true; } struct rq_qos_wait_data { struct wait_queue_entry wq; struct task_struct *task; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; bool got_token; }; static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; } /** * rq_qos_wait - throttle on a rqw if we need to * @rqw: rqw to throttle on * @private_data: caller provided specific data * @acquire_inflight_cb: inc the rqw->inflight counter if we can * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the * waking up based on the resources available. The acquire_inflight_cb should * inc the rqw->inflight if we have the ability to do so, or return false if not * and then we will sleep until the room becomes available. * * cleanup_cb is in case that we race with a waker and need to cleanup the * inflight count accordingly. */ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { .wq = { .func = rq_qos_wake_function, .entry = LIST_HEAD_INIT(data.wq.entry), }, .task = current, .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) return; has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { /* The memory barrier in set_current_state saves us here. */ if (data.got_token) break; if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { finish_wait(&rqw->wait, &data.wq); /* * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ smp_rmb(); if (data.got_token) cleanup_cb(rqw, private_data); break; } io_schedule(); has_sleeper = true; set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } void rq_qos_exit(struct request_queue *q) { mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } mutex_unlock(&q->rq_qos_mutex); } int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; lockdep_assert_held(&q->rq_qos_mutex); rqos->disk = disk; rqos->id = id; rqos->ops = ops; /* * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; blk_mq_unfreeze_queue(q); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } return 0; ebusy: blk_mq_unfreeze_queue(q); return -EBUSY; } void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; lockdep_assert_held(&q->rq_qos_mutex); blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } blk_mq_unfreeze_queue(q); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); mutex_unlock(&q->debugfs_mutex); }
14 14 14 14 15 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * rtl871x_cmd.c * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com> * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _RTL871X_CMD_C_ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/kref.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/circ_buf.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #include <linux/atomic.h> #include <linux/semaphore.h> #include <linux/rtnetlink.h> #include "osdep_service.h" #include "drv_types.h" #include "recv_osdep.h" #include "mlme_osdep.h" /* * Caller and the r8712_cmd_thread can protect cmd_q by spin_lock. * No irqsave is necessary. */ int r8712_init_cmd_priv(struct cmd_priv *pcmdpriv) { init_completion(&pcmdpriv->cmd_queue_comp); init_completion(&pcmdpriv->terminate_cmdthread_comp); _init_queue(&(pcmdpriv->cmd_queue)); /* allocate DMA-able/Non-Page memory for cmd_buf and rsp_buf */ pcmdpriv->cmd_seq = 1; pcmdpriv->cmd_allocated_buf = kmalloc(MAX_CMDSZ + CMDBUFF_ALIGN_SZ, GFP_ATOMIC); if (!pcmdpriv->cmd_allocated_buf) return -ENOMEM; pcmdpriv->cmd_buf = pcmdpriv->cmd_allocated_buf + CMDBUFF_ALIGN_SZ - ((addr_t)(pcmdpriv->cmd_allocated_buf) & (CMDBUFF_ALIGN_SZ - 1)); pcmdpriv->rsp_allocated_buf = kmalloc(MAX_RSPSZ + 4, GFP_ATOMIC); if (!pcmdpriv->rsp_allocated_buf) { kfree(pcmdpriv->cmd_allocated_buf); pcmdpriv->cmd_allocated_buf = NULL; return -ENOMEM; } pcmdpriv->rsp_buf = pcmdpriv->rsp_allocated_buf + 4 - ((addr_t)(pcmdpriv->rsp_allocated_buf) & 3); pcmdpriv->cmd_issued_cnt = 0; pcmdpriv->cmd_done_cnt = 0; pcmdpriv->rsp_cnt = 0; return 0; } int r8712_init_evt_priv(struct evt_priv *pevtpriv) { /* allocate DMA-able/Non-Page memory for cmd_buf and rsp_buf */ pevtpriv->event_seq = 0; pevtpriv->evt_allocated_buf = kmalloc(MAX_EVTSZ + 4, GFP_ATOMIC); if (!pevtpriv->evt_allocated_buf) return -ENOMEM; pevtpriv->evt_buf = pevtpriv->evt_allocated_buf + 4 - ((addr_t)(pevtpriv->evt_allocated_buf) & 3); pevtpriv->evt_done_cnt = 0; return 0; } void r8712_free_evt_priv(struct evt_priv *pevtpriv) { kfree(pevtpriv->evt_allocated_buf); } void r8712_free_cmd_priv(struct cmd_priv *pcmdpriv) { if (pcmdpriv) { kfree(pcmdpriv->cmd_allocated_buf); kfree(pcmdpriv->rsp_allocated_buf); } } /* * Calling Context: * * r8712_enqueue_cmd can only be called between kernel thread, * since only spin_lock is used. * * ISR/Call-Back functions can't call this sub-function. * */ void r8712_enqueue_cmd(struct cmd_priv *pcmdpriv, struct cmd_obj *obj) { struct __queue *queue; unsigned long irqL; if (pcmdpriv->padapter->eeprompriv.bautoload_fail_flag) return; if (!obj) return; queue = &pcmdpriv->cmd_queue; spin_lock_irqsave(&queue->lock, irqL); list_add_tail(&obj->list, &queue->queue); spin_unlock_irqrestore(&queue->lock, irqL); complete(&pcmdpriv->cmd_queue_comp); } struct cmd_obj *r8712_dequeue_cmd(struct __queue *queue) { unsigned long irqL; struct cmd_obj *obj; spin_lock_irqsave(&queue->lock, irqL); obj = list_first_entry_or_null(&queue->queue, struct cmd_obj, list); if (obj) list_del_init(&obj->list); spin_unlock_irqrestore(&queue->lock, irqL); return obj; } void r8712_enqueue_cmd_ex(struct cmd_priv *pcmdpriv, struct cmd_obj *obj) { unsigned long irqL; struct __queue *queue; if (!obj) return; if (pcmdpriv->padapter->eeprompriv.bautoload_fail_flag) return; queue = &pcmdpriv->cmd_queue; spin_lock_irqsave(&queue->lock, irqL); list_add_tail(&obj->list, &queue->queue); spin_unlock_irqrestore(&queue->lock, irqL); complete(&pcmdpriv->cmd_queue_comp); } void r8712_free_cmd_obj(struct cmd_obj *pcmd) { if ((pcmd->cmdcode != _JoinBss_CMD_) && (pcmd->cmdcode != _CreateBss_CMD_)) kfree(pcmd->parmbuf); if (pcmd->rsp) { if (pcmd->rspsz != 0) kfree(pcmd->rsp); } kfree(pcmd); } u8 r8712_sitesurvey_cmd(struct _adapter *padapter, struct ndis_802_11_ssid *pssid) __must_hold(&padapter->mlmepriv.lock) { struct cmd_obj *ph2c; struct sitesurvey_parm *psurveyPara; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return _FAIL; psurveyPara = kmalloc(sizeof(*psurveyPara), GFP_ATOMIC); if (!psurveyPara) { kfree(ph2c); return _FAIL; } init_h2fwcmd_w_parm_no_rsp(ph2c, psurveyPara, GEN_CMD_CODE(_SiteSurvey)); psurveyPara->bsslimit = cpu_to_le32(48); psurveyPara->passive_mode = cpu_to_le32(pmlmepriv->passive_mode); psurveyPara->ss_ssidlen = 0; memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1); if (pssid && pssid->SsidLength) { int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE); memcpy(psurveyPara->ss_ssid, pssid->Ssid, len); psurveyPara->ss_ssidlen = cpu_to_le32(len); } set_fwstate(pmlmepriv, _FW_UNDER_SURVEY); r8712_enqueue_cmd(pcmdpriv, ph2c); mod_timer(&pmlmepriv->scan_to_timer, jiffies + msecs_to_jiffies(SCANNING_TIMEOUT)); padapter->ledpriv.LedControlHandler(padapter, LED_CTL_SITE_SURVEY); complete(&padapter->rx_filter_ready); return _SUCCESS; } int r8712_setdatarate_cmd(struct _adapter *padapter, u8 *rateset) { struct cmd_obj *ph2c; struct setdatarate_parm *pbsetdataratepara; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return -ENOMEM; pbsetdataratepara = kmalloc(sizeof(*pbsetdataratepara), GFP_ATOMIC); if (!pbsetdataratepara) { kfree(ph2c); return -ENOMEM; } init_h2fwcmd_w_parm_no_rsp(ph2c, pbsetdataratepara, GEN_CMD_CODE(_SetDataRate)); pbsetdataratepara->mac_id = 5; memcpy(pbsetdataratepara->datarates, rateset, NumRates); r8712_enqueue_cmd(pcmdpriv, ph2c); return 0; } void r8712_set_chplan_cmd(struct _adapter *padapter, int chplan) { struct cmd_obj *ph2c; struct SetChannelPlan_param *psetchplanpara; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; psetchplanpara = kmalloc(sizeof(*psetchplanpara), GFP_ATOMIC); if (!psetchplanpara) { kfree(ph2c); return; } init_h2fwcmd_w_parm_no_rsp(ph2c, psetchplanpara, GEN_CMD_CODE(_SetChannelPlan)); psetchplanpara->ChannelPlan = chplan; r8712_enqueue_cmd(pcmdpriv, ph2c); } int r8712_setrfreg_cmd(struct _adapter *padapter, u8 offset, u32 val) { struct cmd_obj *ph2c; struct writeRF_parm *pwriterfparm; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return -ENOMEM; pwriterfparm = kmalloc(sizeof(*pwriterfparm), GFP_ATOMIC); if (!pwriterfparm) { kfree(ph2c); return -ENOMEM; } init_h2fwcmd_w_parm_no_rsp(ph2c, pwriterfparm, GEN_CMD_CODE(_SetRFReg)); pwriterfparm->offset = offset; pwriterfparm->value = val; r8712_enqueue_cmd(pcmdpriv, ph2c); return 0; } int r8712_getrfreg_cmd(struct _adapter *padapter, u8 offset, u8 *pval) { struct cmd_obj *ph2c; struct readRF_parm *prdrfparm; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return -ENOMEM; prdrfparm = kmalloc(sizeof(*prdrfparm), GFP_ATOMIC); if (!prdrfparm) { kfree(ph2c); return -ENOMEM; } INIT_LIST_HEAD(&ph2c->list); ph2c->cmdcode = GEN_CMD_CODE(_GetRFReg); ph2c->parmbuf = (unsigned char *)prdrfparm; ph2c->cmdsz = sizeof(struct readRF_parm); ph2c->rsp = pval; ph2c->rspsz = sizeof(struct readRF_rsp); prdrfparm->offset = offset; r8712_enqueue_cmd(pcmdpriv, ph2c); return 0; } void r8712_getbbrfreg_cmdrsp_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { kfree(pcmd->parmbuf); kfree(pcmd); padapter->mppriv.workparam.bcompleted = true; } void r8712_readtssi_cmdrsp_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { kfree(pcmd->parmbuf); kfree(pcmd); padapter->mppriv.workparam.bcompleted = true; } int r8712_createbss_cmd(struct _adapter *padapter) { struct cmd_obj *pcmd; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct wlan_bssid_ex *pdev_network = &padapter->registrypriv.dev_network; padapter->ledpriv.LedControlHandler(padapter, LED_CTL_START_TO_LINK); pcmd = kmalloc(sizeof(*pcmd), GFP_ATOMIC); if (!pcmd) return -ENOMEM; INIT_LIST_HEAD(&pcmd->list); pcmd->cmdcode = _CreateBss_CMD_; pcmd->parmbuf = (unsigned char *)pdev_network; pcmd->cmdsz = r8712_get_wlan_bssid_ex_sz(pdev_network); pcmd->rsp = NULL; pcmd->rspsz = 0; /* notes: translate IELength & Length after assign to cmdsz; */ pdev_network->Length = pcmd->cmdsz; pdev_network->IELength = pdev_network->IELength; pdev_network->Ssid.SsidLength = pdev_network->Ssid.SsidLength; r8712_enqueue_cmd(pcmdpriv, pcmd); return 0; } int r8712_joinbss_cmd(struct _adapter *padapter, struct wlan_network *pnetwork) { struct wlan_bssid_ex *psecnetwork; struct cmd_obj *pcmd; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct qos_priv *pqospriv = &pmlmepriv->qospriv; struct security_priv *psecuritypriv = &padapter->securitypriv; struct registry_priv *pregistrypriv = &padapter->registrypriv; enum NDIS_802_11_NETWORK_INFRASTRUCTURE ndis_network_mode = pnetwork->network.InfrastructureMode; padapter->ledpriv.LedControlHandler(padapter, LED_CTL_START_TO_LINK); pcmd = kmalloc(sizeof(*pcmd), GFP_ATOMIC); if (!pcmd) return -ENOMEM; /* for hidden ap to set fw_state here */ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE | WIFI_ADHOC_STATE) != true) { switch (ndis_network_mode) { case Ndis802_11IBSS: pmlmepriv->fw_state |= WIFI_ADHOC_STATE; break; case Ndis802_11Infrastructure: pmlmepriv->fw_state |= WIFI_STATION_STATE; break; case Ndis802_11APMode: case Ndis802_11AutoUnknown: case Ndis802_11InfrastructureMax: break; } } psecnetwork = &psecuritypriv->sec_bss; memcpy(psecnetwork, &pnetwork->network, sizeof(*psecnetwork)); psecuritypriv->authenticator_ie[0] = (unsigned char) psecnetwork->IELength; if ((psecnetwork->IELength - 12) < (256 - 1)) memcpy(&psecuritypriv->authenticator_ie[1], &psecnetwork->IEs[12], psecnetwork->IELength - 12); else memcpy(&psecuritypriv->authenticator_ie[1], &psecnetwork->IEs[12], (256 - 1)); psecnetwork->IELength = 0; /* * If the driver wants to use the bssid to create the connection. * If not, we copy the connecting AP's MAC address to it so that * the driver just has the bssid information for PMKIDList searching. */ if (!pmlmepriv->assoc_by_bssid) ether_addr_copy(&pmlmepriv->assoc_bssid[0], &pnetwork->network.MacAddress[0]); psecnetwork->IELength = r8712_restruct_sec_ie(padapter, &pnetwork->network.IEs[0], &psecnetwork->IEs[0], pnetwork->network.IELength); pqospriv->qos_option = 0; if (pregistrypriv->wmm_enable) { u32 tmp_len; tmp_len = r8712_restruct_wmm_ie(padapter, &pnetwork->network.IEs[0], &psecnetwork->IEs[0], pnetwork->network.IELength, psecnetwork->IELength); if (psecnetwork->IELength != tmp_len) { psecnetwork->IELength = tmp_len; pqospriv->qos_option = 1; /* WMM IE in beacon */ } else { pqospriv->qos_option = 0; /* no WMM IE in beacon */ } } if (pregistrypriv->ht_enable) { /* * For WEP mode, we will use the bg mode to do the connection * to avoid some IOT issues, especially for Realtek 8192u * SoftAP. */ if ((padapter->securitypriv.PrivacyAlgrthm != _WEP40_) && (padapter->securitypriv.PrivacyAlgrthm != _WEP104_)) { /* restructure_ht_ie */ r8712_restructure_ht_ie(padapter, &pnetwork->network.IEs[0], &psecnetwork->IEs[0], pnetwork->network.IELength, &psecnetwork->IELength); } } psecuritypriv->supplicant_ie[0] = (u8)psecnetwork->IELength; if (psecnetwork->IELength < 255) memcpy(&psecuritypriv->supplicant_ie[1], &psecnetwork->IEs[0], psecnetwork->IELength); else memcpy(&psecuritypriv->supplicant_ie[1], &psecnetwork->IEs[0], 255); /* get cmdsz before endian conversion */ pcmd->cmdsz = r8712_get_wlan_bssid_ex_sz(psecnetwork); #ifdef __BIG_ENDIAN /* wlan_network endian conversion */ psecnetwork->Length = cpu_to_le32(psecnetwork->Length); psecnetwork->Ssid.SsidLength = cpu_to_le32(psecnetwork->Ssid.SsidLength); psecnetwork->Privacy = cpu_to_le32(psecnetwork->Privacy); psecnetwork->Rssi = cpu_to_le32(psecnetwork->Rssi); psecnetwork->NetworkTypeInUse = cpu_to_le32(psecnetwork->NetworkTypeInUse); psecnetwork->Configuration.ATIMWindow = cpu_to_le32(psecnetwork->Configuration.ATIMWindow); psecnetwork->Configuration.BeaconPeriod = cpu_to_le32(psecnetwork->Configuration.BeaconPeriod); psecnetwork->Configuration.DSConfig = cpu_to_le32(psecnetwork->Configuration.DSConfig); psecnetwork->Configuration.FHConfig.DwellTime = cpu_to_le32(psecnetwork->Configuration.FHConfig.DwellTime); psecnetwork->Configuration.FHConfig.HopPattern = cpu_to_le32(psecnetwork->Configuration.FHConfig.HopPattern); psecnetwork->Configuration.FHConfig.HopSet = cpu_to_le32(psecnetwork->Configuration.FHConfig.HopSet); psecnetwork->Configuration.FHConfig.Length = cpu_to_le32(psecnetwork->Configuration.FHConfig.Length); psecnetwork->Configuration.Length = cpu_to_le32(psecnetwork->Configuration.Length); psecnetwork->InfrastructureMode = cpu_to_le32(psecnetwork->InfrastructureMode); psecnetwork->IELength = cpu_to_le32(psecnetwork->IELength); #endif INIT_LIST_HEAD(&pcmd->list); pcmd->cmdcode = _JoinBss_CMD_; pcmd->parmbuf = (unsigned char *)psecnetwork; pcmd->rsp = NULL; pcmd->rspsz = 0; r8712_enqueue_cmd(pcmdpriv, pcmd); return 0; } void r8712_disassoc_cmd(struct _adapter *padapter) /* for sta_mode */ { struct cmd_obj *pdisconnect_cmd; struct disconnect_parm *pdisconnect; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; pdisconnect_cmd = kmalloc(sizeof(*pdisconnect_cmd), GFP_ATOMIC); if (!pdisconnect_cmd) return; pdisconnect = kmalloc(sizeof(*pdisconnect), GFP_ATOMIC); if (!pdisconnect) { kfree(pdisconnect_cmd); return; } init_h2fwcmd_w_parm_no_rsp(pdisconnect_cmd, pdisconnect, _DisConnect_CMD_); r8712_enqueue_cmd(pcmdpriv, pdisconnect_cmd); } void r8712_setopmode_cmd(struct _adapter *padapter, enum NDIS_802_11_NETWORK_INFRASTRUCTURE networktype) { struct cmd_obj *ph2c; struct setopmode_parm *psetop; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; psetop = kmalloc(sizeof(*psetop), GFP_ATOMIC); if (!psetop) { kfree(ph2c); return; } init_h2fwcmd_w_parm_no_rsp(ph2c, psetop, _SetOpMode_CMD_); psetop->mode = (u8)networktype; r8712_enqueue_cmd(pcmdpriv, ph2c); } void r8712_setstakey_cmd(struct _adapter *padapter, u8 *psta, u8 unicast_key) { struct cmd_obj *ph2c; struct set_stakey_parm *psetstakey_para; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct set_stakey_rsp *psetstakey_rsp = NULL; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &padapter->securitypriv; struct sta_info *sta = (struct sta_info *)psta; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; psetstakey_para = kmalloc(sizeof(*psetstakey_para), GFP_ATOMIC); if (!psetstakey_para) { kfree(ph2c); return; } psetstakey_rsp = kmalloc(sizeof(*psetstakey_rsp), GFP_ATOMIC); if (!psetstakey_rsp) { kfree(ph2c); kfree(psetstakey_para); return; } init_h2fwcmd_w_parm_no_rsp(ph2c, psetstakey_para, _SetStaKey_CMD_); ph2c->rsp = (u8 *)psetstakey_rsp; ph2c->rspsz = sizeof(struct set_stakey_rsp); ether_addr_copy(psetstakey_para->addr, sta->hwaddr); if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) psetstakey_para->algorithm = (unsigned char) psecuritypriv->PrivacyAlgrthm; else GET_ENCRY_ALGO(psecuritypriv, sta, psetstakey_para->algorithm, false); if (unicast_key) memcpy(&psetstakey_para->key, &sta->x_UncstKey, 16); else memcpy(&psetstakey_para->key, &psecuritypriv->XGrpKey[psecuritypriv->XGrpKeyid - 1].skey, 16); r8712_enqueue_cmd(pcmdpriv, ph2c); } void r8712_setMacAddr_cmd(struct _adapter *padapter, const u8 *mac_addr) { struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct cmd_obj *ph2c; struct SetMacAddr_param *psetMacAddr_para; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; psetMacAddr_para = kmalloc(sizeof(*psetMacAddr_para), GFP_ATOMIC); if (!psetMacAddr_para) { kfree(ph2c); return; } init_h2fwcmd_w_parm_no_rsp(ph2c, psetMacAddr_para, _SetMacAddress_CMD_); ether_addr_copy(psetMacAddr_para->MacAddr, mac_addr); r8712_enqueue_cmd(pcmdpriv, ph2c); } void r8712_addbareq_cmd(struct _adapter *padapter, u8 tid) { struct cmd_priv *pcmdpriv = &padapter->cmdpriv; struct cmd_obj *ph2c; struct addBaReq_parm *paddbareq_parm; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; paddbareq_parm = kmalloc(sizeof(*paddbareq_parm), GFP_ATOMIC); if (!paddbareq_parm) { kfree(ph2c); return; } paddbareq_parm->tid = tid; init_h2fwcmd_w_parm_no_rsp(ph2c, paddbareq_parm, GEN_CMD_CODE(_AddBAReq)); r8712_enqueue_cmd_ex(pcmdpriv, ph2c); } void r8712_wdg_wk_cmd(struct _adapter *padapter) { struct cmd_obj *ph2c; struct drvint_cmd_parm *pdrvintcmd_param; struct cmd_priv *pcmdpriv = &padapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; pdrvintcmd_param = kmalloc(sizeof(*pdrvintcmd_param), GFP_ATOMIC); if (!pdrvintcmd_param) { kfree(ph2c); return; } pdrvintcmd_param->i_cid = WDG_WK_CID; pdrvintcmd_param->sz = 0; pdrvintcmd_param->pbuf = NULL; init_h2fwcmd_w_parm_no_rsp(ph2c, pdrvintcmd_param, _DRV_INT_CMD_); r8712_enqueue_cmd_ex(pcmdpriv, ph2c); } void r8712_survey_cmd_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (pcmd->res != H2C_SUCCESS) clr_fwstate(pmlmepriv, _FW_UNDER_SURVEY); r8712_free_cmd_obj(pcmd); } void r8712_disassoc_cmd_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { unsigned long irqL; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (pcmd->res != H2C_SUCCESS) { spin_lock_irqsave(&pmlmepriv->lock, irqL); set_fwstate(pmlmepriv, _FW_LINKED); spin_unlock_irqrestore(&pmlmepriv->lock, irqL); return; } r8712_free_cmd_obj(pcmd); } void r8712_joinbss_cmd_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (pcmd->res != H2C_SUCCESS) mod_timer(&pmlmepriv->assoc_timer, jiffies + msecs_to_jiffies(1)); r8712_free_cmd_obj(pcmd); } void r8712_createbss_cmd_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { unsigned long irqL; struct sta_info *psta = NULL; struct wlan_network *pwlan = NULL; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct wlan_bssid_ex *pnetwork = (struct wlan_bssid_ex *)pcmd->parmbuf; struct wlan_network *tgt_network = &(pmlmepriv->cur_network); if (pcmd->res != H2C_SUCCESS) mod_timer(&pmlmepriv->assoc_timer, jiffies + msecs_to_jiffies(1)); del_timer(&pmlmepriv->assoc_timer); #ifdef __BIG_ENDIAN /* endian_convert */ pnetwork->Length = le32_to_cpu(pnetwork->Length); pnetwork->Ssid.SsidLength = le32_to_cpu(pnetwork->Ssid.SsidLength); pnetwork->Privacy = le32_to_cpu(pnetwork->Privacy); pnetwork->Rssi = le32_to_cpu(pnetwork->Rssi); pnetwork->NetworkTypeInUse = le32_to_cpu(pnetwork->NetworkTypeInUse); pnetwork->Configuration.ATIMWindow = le32_to_cpu(pnetwork->Configuration.ATIMWindow); pnetwork->Configuration.DSConfig = le32_to_cpu(pnetwork->Configuration.DSConfig); pnetwork->Configuration.FHConfig.DwellTime = le32_to_cpu(pnetwork->Configuration.FHConfig.DwellTime); pnetwork->Configuration.FHConfig.HopPattern = le32_to_cpu(pnetwork->Configuration.FHConfig.HopPattern); pnetwork->Configuration.FHConfig.HopSet = le32_to_cpu(pnetwork->Configuration.FHConfig.HopSet); pnetwork->Configuration.FHConfig.Length = le32_to_cpu(pnetwork->Configuration.FHConfig.Length); pnetwork->Configuration.Length = le32_to_cpu(pnetwork->Configuration.Length); pnetwork->InfrastructureMode = le32_to_cpu(pnetwork->InfrastructureMode); pnetwork->IELength = le32_to_cpu(pnetwork->IELength); #endif spin_lock_irqsave(&pmlmepriv->lock, irqL); if ((pmlmepriv->fw_state) & WIFI_AP_STATE) { psta = r8712_get_stainfo(&padapter->stapriv, pnetwork->MacAddress); if (!psta) { psta = r8712_alloc_stainfo(&padapter->stapriv, pnetwork->MacAddress); if (!psta) goto createbss_cmd_fail; } r8712_indicate_connect(padapter); } else { pwlan = _r8712_alloc_network(pmlmepriv); if (!pwlan) { pwlan = r8712_get_oldest_wlan_network(&pmlmepriv->scanned_queue); if (!pwlan) goto createbss_cmd_fail; pwlan->last_scanned = jiffies; } else { list_add_tail(&(pwlan->list), &pmlmepriv->scanned_queue.queue); } pnetwork->Length = r8712_get_wlan_bssid_ex_sz(pnetwork); memcpy(&(pwlan->network), pnetwork, pnetwork->Length); pwlan->fixed = true; memcpy(&tgt_network->network, pnetwork, (r8712_get_wlan_bssid_ex_sz(pnetwork))); if (pmlmepriv->fw_state & _FW_UNDER_LINKING) pmlmepriv->fw_state ^= _FW_UNDER_LINKING; /* * we will set _FW_LINKED when there is one more sat to * join us (stassoc_event_callback) */ } createbss_cmd_fail: spin_unlock_irqrestore(&pmlmepriv->lock, irqL); r8712_free_cmd_obj(pcmd); } void r8712_setstaKey_cmdrsp_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { struct sta_priv *pstapriv = &padapter->stapriv; struct set_stakey_rsp *psetstakey_rsp = (struct set_stakey_rsp *) (pcmd->rsp); struct sta_info *psta = r8712_get_stainfo(pstapriv, psetstakey_rsp->addr); if (!psta) goto exit; psta->aid = psta->mac_id = psetstakey_rsp->keyid; /*CAM_ID(CAM_ENTRY)*/ exit: r8712_free_cmd_obj(pcmd); } void r8712_setassocsta_cmdrsp_callback(struct _adapter *padapter, struct cmd_obj *pcmd) { unsigned long irqL; struct sta_priv *pstapriv = &padapter->stapriv; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct set_assocsta_parm *passocsta_parm = (struct set_assocsta_parm *)(pcmd->parmbuf); struct set_assocsta_rsp *passocsta_rsp = (struct set_assocsta_rsp *) (pcmd->rsp); struct sta_info *psta = r8712_get_stainfo(pstapriv, passocsta_parm->addr); if (!psta) return; psta->aid = psta->mac_id = passocsta_rsp->cam_id; spin_lock_irqsave(&pmlmepriv->lock, irqL); if ((check_fwstate(pmlmepriv, WIFI_MP_STATE)) && (check_fwstate(pmlmepriv, _FW_UNDER_LINKING))) pmlmepriv->fw_state ^= _FW_UNDER_LINKING; set_fwstate(pmlmepriv, _FW_LINKED); spin_unlock_irqrestore(&pmlmepriv->lock, irqL); r8712_free_cmd_obj(pcmd); } void r8712_disconnectCtrlEx_cmd(struct _adapter *adapter, u32 enableDrvCtrl, u32 tryPktCnt, u32 tryPktInterval, u32 firstStageTO) { struct cmd_obj *ph2c; struct DisconnectCtrlEx_param *param; struct cmd_priv *pcmdpriv = &adapter->cmdpriv; ph2c = kmalloc(sizeof(*ph2c), GFP_ATOMIC); if (!ph2c) return; param = kzalloc(sizeof(*param), GFP_ATOMIC); if (!param) { kfree(ph2c); return; } param->EnableDrvCtrl = (unsigned char)enableDrvCtrl; param->TryPktCnt = (unsigned char)tryPktCnt; param->TryPktInterval = (unsigned char)tryPktInterval; param->FirstStageTO = (unsigned int)firstStageTO; init_h2fwcmd_w_parm_no_rsp(ph2c, param, GEN_CMD_CODE(_DisconnectCtrlEx)); r8712_enqueue_cmd(pcmdpriv, ph2c); }
76 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_gact.c Generic actions * * copyright Jamal Hadi Salim (2002-4) */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> #include <net/tc_wrapper.h> static struct tc_action_ops act_gact_ops; #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ if (get_random_u32_below(gact->tcfg_pval)) return gact->tcf_action; return gact->tcfg_paction; } static int gact_determ(struct tcf_gact *gact) { u32 pack = atomic_inc_return(&gact->packets); smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ if (pack % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } typedef int (*g_rand)(struct tcf_gact *gact); static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ }; #endif /* CONFIG_GACT_PROB */ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { [TCA_GACT_PARMS] = { .len = sizeof(struct tc_gact) }, [TCA_GACT_PROB] = { .len = sizeof(struct tc_gact_p) }, }; static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; u32 index; int err; #ifdef CONFIG_GACT_PROB struct tc_gact_p *p_parm = NULL; #endif if (nla == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_GACT_MAX, nla, gact_policy, NULL); if (err < 0) return err; if (tb[TCA_GACT_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_GACT_PARMS]); index = parm->index; #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; #else if (tb[TCA_GACT_PROB]) { p_parm = nla_data(tb[TCA_GACT_PROB]); if (p_parm->ptype >= MAX_RAND) return -EINVAL; if (TC_ACT_EXT_CMP(p_parm->paction, TC_ACT_GOTO_CHAIN)) { NL_SET_ERR_MSG(extack, "goto chain not allowed on fallback"); return -EINVAL; } } #endif err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_gact_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (err > 0) { if (bind)/* dont override defaults */ return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } else { return err; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; gact = to_gact(*a); spin_lock_bh(&gact->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = max_t(u16, 1, p_parm->pval); /* Make sure tcfg_pval is written before tcfg_ptype * coupled with smp_rmb() in gact_net_rand() & gact_determ() */ smp_wmb(); gact->tcfg_ptype = p_parm->ptype; } #endif spin_unlock_bh(&gact->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; release_idr: tcf_idr_release(*a, bind); return err; } TC_INDIRECT_SCOPE int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gact *gact = to_gact(a); int action = READ_ONCE(gact->tcf_action); #ifdef CONFIG_GACT_PROB { u32 ptype = READ_ONCE(gact->tcfg_ptype); if (ptype) action = gact_rand[ptype](gact); } #endif tcf_action_update_bstats(&gact->common, skb); if (action == TC_ACT_SHOT) tcf_action_inc_drop_qstats(&gact->common); tcf_lastuse_update(&gact->tcf_tm); return action; } static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_gact *gact = to_gact(a); int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT ? packets : drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_gact *gact = to_gact(a); struct tc_gact opt = { .index = gact->tcf_index, .refcnt = refcount_read(&gact->tcf_refcnt) - ref, .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&gact->tcf_lock); opt.action = gact->tcf_action; if (nla_put(skb, TCA_GACT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; #ifdef CONFIG_GACT_PROB if (gact->tcfg_ptype) { struct tc_gact_p p_opt = { .paction = gact->tcfg_paction, .pval = gact->tcfg_pval, .ptype = gact->tcfg_ptype, }; if (nla_put(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt)) goto nla_put_failure; } #endif tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD)) goto nla_put_failure; spin_unlock_bh(&gact->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&gact->tcf_lock); nlmsg_trim(skb, b); return -1; } static size_t tcf_gact_get_fill_size(const struct tc_action *act) { size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ #ifdef CONFIG_GACT_PROB if (to_gact(act)->tcfg_ptype) /* TCA_GACT_PROB */ sz += nla_total_size(sizeof(struct tc_gact_p)); #endif return sz; } static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { entry->id = FLOW_ACTION_DROP; } else if (is_tcf_gact_trap(act)) { entry->id = FLOW_ACTION_TRAP; } else if (is_tcf_gact_goto_chain(act)) { entry->id = FLOW_ACTION_GOTO; entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_gact_continue(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported"); return -EOPNOTSUPP; } else if (is_tcf_gact_reclassify(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported"); return -EOPNOTSUPP; } else if (is_tcf_gact_pipe(act)) { NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported"); return -EOPNOTSUPP; } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_gact_ok(act)) fl_action->id = FLOW_ACTION_ACCEPT; else if (is_tcf_gact_shot(act)) fl_action->id = FLOW_ACTION_DROP; else if (is_tcf_gact_trap(act)) fl_action->id = FLOW_ACTION_TRAP; else if (is_tcf_gact_goto_chain(act)) fl_action->id = FLOW_ACTION_GOTO; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_gact_ops = { .kind = "gact", .id = TCA_ID_GACT, .owner = THIS_MODULE, .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, .dump = tcf_gact_dump, .init = tcf_gact_init, .get_fill_size = tcf_gact_get_fill_size, .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), }; MODULE_ALIAS_NET_ACT("gact"); static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_gact_ops.net_id); } static struct pernet_operations gact_net_ops = { .init = gact_init_net, .exit_batch = gact_exit_net, .id = &act_gact_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Classifier actions"); MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB pr_info("GACT probability on\n"); #else pr_info("GACT probability NOT on\n"); #endif return tcf_register_action(&act_gact_ops, &gact_net_ops); } static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops, &gact_net_ops); } module_init(gact_init_module); module_exit(gact_cleanup_module);
4 7 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _NET_CORE_DEV_H #define _NET_CORE_DEV_H #include <linux/types.h> #include <linux/rwsem.h> #include <linux/netdevice.h> struct net; struct netlink_ext_ack; struct cpumask; /* Random bits of netdevice that don't need to be exposed */ #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ struct sd_flow_limit { u64 count; unsigned int num_buckets; unsigned int history_head; u16 history[FLOW_LIMIT_HISTORY]; u8 buckets[]; }; extern int netdev_flow_limit_table_len; #ifdef CONFIG_PROC_FS int __init dev_proc_init(void); #else #define dev_proc_init() 0 #endif void linkwatch_init_dev(struct net_device *dev); void linkwatch_run_queue(void); void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; extern struct rw_semaphore dev_addr_sem; /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); /* netdev management, shared between various uAPI entry points */ struct netdev_name_node { struct hlist_node hlist; struct list_head list; struct net_device *dev; const char *name; struct rcu_head rcu; }; int netdev_get_name(struct net *net, char *name, int ifindex); int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ list_for_each_entry((namenode), &(dev)->name_node->list, list) #define netdev_for_each_altname_safe(dev, namenode, next) \ list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \ list) int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); int dev_set_mtu_ext(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, u32 value); typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); void dev_set_group(struct net_device *dev, int new_group); int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, unsigned int gchanges, u32 portid, const struct nlmsghdr *nlh); void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh); static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { /* dev->gso_max_size is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_max_size, size); if (size <= GSO_LEGACY_MAX_SIZE) WRITE_ONCE(dev->gso_ipv4_max_size, size); } static inline void netif_set_gso_max_segs(struct net_device *dev, unsigned int segs) { /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_max_segs, segs); } static inline void netif_set_gro_max_size(struct net_device *dev, unsigned int size) { /* This pairs with the READ_ONCE() in skb_gro_receive() */ WRITE_ONCE(dev->gro_max_size, size); if (size <= GRO_LEGACY_MAX_SIZE) WRITE_ONCE(dev->gro_ipv4_max_size, size); } static inline void netif_set_gso_ipv4_max_size(struct net_device *dev, unsigned int size) { /* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_ipv4_max_size, size); } static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, unsigned int size) { /* This pairs with the READ_ONCE() in skb_gro_receive() */ WRITE_ONCE(dev->gro_ipv4_max_size, size); } int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) void xdp_do_check_flushed(struct napi_struct *napi); #else static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif struct napi_struct *napi_by_id(unsigned int napi_id); void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 #ifndef CONFIG_PREEMPT_RT static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > XMIT_RECURSION_LIMIT); } static inline void dev_xmit_recursion_inc(void) { __this_cpu_inc(softnet_data.xmit.recursion); } static inline void dev_xmit_recursion_dec(void) { __this_cpu_dec(softnet_data.xmit.recursion); } #else static inline bool dev_xmit_recursion(void) { return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); } static inline void dev_xmit_recursion_inc(void) { current->net_xmit.recursion++; } static inline void dev_xmit_recursion_dec(void) { current->net_xmit.recursion--; } #endif int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack); #endif
317 319 158 158 159 156 5 157 156 154 159 1 157 332 332 318 2 329 330 330 330 330 13 325 3 323 59 319 318 318 313 1 317 317 316 315 162 315 1 158 82 332 316 6 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ /* * The MMU needs to be able to access/walk 32-bit and 64-bit guest page tables, * as well as guest EPT tables, so the code in this file is compiled thrice, * once per guest PTE type. The per-type defines are #undef'd at the end. */ #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name #define PT_LEVEL_BITS 9 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 #define pt_element_t u32 #define guest_walker guest_walker32 #define FNAME(name) paging##32_##name #define PT_LEVEL_BITS 10 #define PT_MAX_FULL_LEVELS 2 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #define PT32_DIR_PSE36_SIZE 4 #define PT32_DIR_PSE36_SHIFT 13 #define PT32_DIR_PSE36_MASK \ (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT #define FNAME(name) ept_##name #define PT_LEVEL_BITS 9 #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) (!(mmu)->cpu_role.base.ad_disabled) #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value #endif /* Common logic, but per-type values. These also need to be undefined. */ #define PT_BASE_ADDR_MASK ((pt_element_t)__PT_BASE_ADDR_MASK) #define PT_LVL_ADDR_MASK(lvl) __PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS) #define PT_LVL_OFFSET_MASK(lvl) __PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS) #define PT_INDEX(addr, lvl) __PT_INDEX(addr, lvl, PT_LEVEL_BITS) #define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT) #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K) /* * The guest_walker structure emulates the behavior of the hardware page * table walker. */ struct guest_walker { int level; unsigned max_level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned int pt_access[PT_MAX_FULL_LEVELS]; unsigned int pte_access; gfn_t gfn; struct x86_exception fault; }; #if PTTYPE == 32 static inline gfn_t pse36_gfn_delta(u32 gpte) { int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; return (gpte & PT32_DIR_PSE36_MASK) << shift; } #endif static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) { return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access, unsigned gpte) { unsigned mask; /* dirty bit is not supported, so no need to track it */ if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return; BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); mask = (unsigned)~ACC_WRITE_MASK; /* Allow write access to dirty gptes */ mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; *access &= mask; } static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT return pte & PT_PRESENT_MASK; #else return pte & 7; #endif } static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte) { #if PTTYPE != PTTYPE_EPT return false; #else return __is_bad_mt_xwr(rsvd_check, gpte); #endif } static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) { return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) || FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { if (!FNAME(is_present_gpte)(gpte)) goto no_present; /* Prefetch only accessed entries (unless A/D bits are disabled). */ if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K)) goto no_present; return false; no_present: drop_spte(vcpu->kvm, spte); return true; } /* * For PTTYPE_EPT, a page table can be executable but not readable * on supported processors. Therefore, set_spte does not automatically * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK * to signify readability since it isn't used in the EPT case */ static inline unsigned FNAME(gpte_access)(u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0); #else BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); BUILD_BUG_ON(ACC_EXEC_MASK != 1); access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK); /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */ access ^= (gpte >> PT64_NX_SHIFT); #endif return access; } static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; pt_element_t __user *ptep_user; gfn_t table_gfn; int ret; /* dirty/accessed bits are not supported, so no need to update them */ if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return 0; for (level = walker->max_level; level >= walker->level; --level) { pte = orig_pte = walker->ptes[level - 1]; table_gfn = walker->table_gfn[level - 1]; ptep_user = walker->ptep_user[level - 1]; index = offset_in_page(ptep_user) / sizeof(pt_element_t); if (!(pte & PT_GUEST_ACCESSED_MASK)) { trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); pte |= PT_GUEST_ACCESSED_MASK; } if (level == walker->level && write_fault && !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; } if (pte == orig_pte) continue; /* * If the slot is read-only, simply do not process the accessed * and dirty bits. This is the correct thing to do if the slot * is ROM, and page tables in read-as-ROM/write-as-MMIO slots * are only supported if the accessed and dirty bits are already * set in the ROM (so that MMIO writes are never needed). * * Note that NPT does not allow this at all and faults, since * it always wants nested page table entries for the guest * page tables to be writable. And EPT works but will simply * overwrite the read-only memory to set the accessed and dirty * bits. */ if (unlikely(!walker->pte_writable[level - 1])) continue; ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault); if (ret) return ret; kvm_vcpu_mark_page_dirty(vcpu, table_gfn); walker->ptes[level - 1] = pte; } return 0; } static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) { unsigned pkeys = 0; #if PTTYPE == 64 pte_t pte = {.pte = gpte}; pkeys = pte_flags_pkey(pte_flags(pte)); #endif return pkeys; } static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu, unsigned int level, unsigned int gpte) { /* * For EPT and PAE paging (both variants), bit 7 is either reserved at * all level or indicates a huge page (ignoring CR3/EPTP). In either * case, bit 7 being set terminates the walk. */ #if PTTYPE == 32 /* * 32-bit paging requires special handling because bit 7 is ignored if * CR4.PSE=0, not reserved. Clear bit 7 in the gpte if the level is * greater than the last level for which bit 7 is the PAGE_SIZE bit. * * The RHS has bit 7 set iff level < (2 + PSE). If it is clear, bit 7 * is not reserved and does not indicate a large page at this level, * so clear PT_PAGE_SIZE_MASK in gpte if that is the case. */ gpte &= level - (PT32_ROOT_LEVEL + mmu->cpu_role.ext.cr4_pse); #endif /* * PG_LEVEL_4K always terminates. The RHS has bit 7 set * iff level <= PG_LEVEL_4K, which for our purpose means * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then. */ gpte |= level - PG_LEVEL_4K - 1; return gpte & PT_PAGE_SIZE_MASK; } /* * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t addr, u64 access) { int ret; pt_element_t pte; pt_element_t __user *ptep_user; gfn_t table_gfn; u64 pt_access, pte_access; unsigned index, accessed_dirty, pte_pkey; u64 nested_access; gpa_t pte_gpa; bool have_ad; int offset; u64 walk_nx_mask = 0; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; u16 errcode = 0; gpa_t real_gpa; gfn_t gfn; trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: walker->level = mmu->cpu_role.base.level; pte = kvm_mmu_get_guest_pgd(vcpu, mmu); have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 walk_nx_mask = 1ULL << PT64_NX_SHIFT; if (walker->level == PT32E_ROOT_LEVEL) { pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!FNAME(is_present_gpte)(pte)) goto error; --walker->level; } #endif walker->max_level = walker->level; /* * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging * by the MOV to CR instruction are treated as reads and do not cause the * processor to set the dirty flag in any EPT paging-structure entry. */ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; pte_access = ~0; /* * Queue a page fault for injection if this assertion fails, as callers * assume that walker.fault contains sane info on a walk failure. I.e. * avoid making the situation worse by inducing even worse badness * between when the assertion fails and when KVM kicks the vCPU out to * userspace (because the VM is bugged). */ if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm)) goto error; ++walker->level; do { struct kvm_memory_slot *slot; unsigned long host_addr; pt_access = pte_access; --walker->level; index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); offset = index * sizeof(pt_element_t); pte_gpa = gfn_to_gpa(table_gfn) + offset; BUG_ON(walker->level < 1); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn), nested_access, &walker->fault); /* * FIXME: This can happen if emulation (for of an INS/OUTS * instruction) triggers a nested page fault. The exit * qualification / exit info field will incorrectly have * "guest page access" as the nested page fault's cause, * instead of "guest page structure access". To fix this, * the x86_exception struct should be augmented with enough * information to fix the exit_qualification or exit_info_1 * fields. */ if (unlikely(real_gpa == INVALID_GPA)) return 0; slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa)); if (!kvm_is_visible_memslot(slot)) goto error; host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; ptep_user = (pt_element_t __user *)((void *)host_addr + offset); if (unlikely(__get_user(pte, ptep_user))) goto error; walker->ptep_user[walker->level - 1] = ptep_user; trace_kvm_mmu_paging_element(pte, walker->level); /* * Inverting the NX it lets us AND it like other * permission bits. */ pte_access = pt_access & (pte ^ walk_nx_mask); if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) { errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } walker->ptes[walker->level - 1] = pte; /* Convert to ACC_*_MASK flags for struct guest_walker. */ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask); } while (!FNAME(is_last_gpte)(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; /* Convert to ACC_*_MASK flags for struct guest_walker. */ walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) goto error; gfn = gpte_to_gfn_lvl(pte, walker->level); gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; #if PTTYPE == 32 if (walker->level > PG_LEVEL_4K && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); #endif real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault); if (real_gpa == INVALID_GPA) return 0; walker->gfn = real_gpa >> PAGE_SHIFT; if (!write_fault) FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); else /* * On a write fault, fold the dirty bit into accessed_dirty. * For modes without A/D bits support accessed_dirty will be * always clear. */ accessed_dirty &= pte >> (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) goto retry_walk; } return 1; error: errcode |= write_fault | user_fault; if (fetch_fault && (is_efer_nx(mmu) || is_cr4_smep(mmu))) errcode |= PFERR_FETCH_MASK; walker->fault.vector = PF_VECTOR; walker->fault.error_code_valid = true; walker->fault.error_code = errcode; #if PTTYPE == PTTYPE_EPT /* * Use PFERR_RSVD_MASK in error_code to tell if EPT * misconfiguration requires to be injected. The detection is * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: * [2:0] - Derive from the access bits. The exit_qualification might be * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { walker->fault.exit_qualification = 0; if (write_fault) walker->fault.exit_qualification |= EPT_VIOLATION_ACC_WRITE; if (user_fault) walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ; if (fetch_fault) walker->fault.exit_qualification |= EPT_VIOLATION_ACC_INSTR; /* * Note, pte_access holds the raw RWX bits from the EPTE, not * ACC_*_MASK flags! */ walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << EPT_VIOLATION_RWX_SHIFT; } #endif walker->fault.address = addr; walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; walker->fault.async_page_fault = false; trace_kvm_mmu_walker_error(walker->fault.error_code); return 0; } static int FNAME(walk_addr)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gpa_t addr, u64 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); } static bool FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, pt_element_t gpte) { struct kvm_memory_slot *slot; unsigned pte_access; gfn_t gfn; kvm_pfn_t pfn; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) return false; gfn = gpte_to_gfn(gpte); pte_access = sp->role.access & FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, pte_access & ACC_WRITE_MASK); if (!slot) return false; pfn = gfn_to_pfn_memslot_atomic(slot, gfn); if (is_error_pfn(pfn)) return false; mmu_set_spte(vcpu, slot, spte, pte_access, gfn, pfn, NULL); kvm_release_pfn_clean(pfn); return true; } static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, struct guest_walker *gw, int level) { pt_element_t curr_pte; gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1]; u64 mask; int r, index; if (level == PG_LEVEL_4K) { mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1; base_gpa = pte_gpa & ~mask; index = (pte_gpa - base_gpa) / sizeof(pt_element_t); r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa, gw->prefetch_ptes, sizeof(gw->prefetch_ptes)); curr_pte = gw->prefetch_ptes[index]; } else r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &curr_pte, sizeof(curr_pte)); return r || curr_pte != gw->ptes[level - 1]; } static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, u64 *sptep) { struct kvm_mmu_page *sp; pt_element_t *gptep = gw->prefetch_ptes; u64 *spte; int i; sp = sptep_to_sp(sptep); if (sp->role.level > PG_LEVEL_4K) return; /* * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) return __direct_pte_prefetch(vcpu, sp, sptep); i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1); spte = sp->spt + i; for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { if (spte == sptep) continue; if (is_shadow_present_pte(*spte)) continue; if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i])) break; } } /* * Fetch a shadow pte for a specific level in the paging hierarchy. * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct guest_walker *gw) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned int direct_access, access; int top_level, ret; gfn_t base_gfn = fault->gfn; WARN_ON_ONCE(gw->gfn != base_gfn); direct_access = gw->pte_access; top_level = vcpu->arch.mmu->cpu_role.base.level; if (top_level == PT32E_ROOT_LEVEL) top_level = PT32_ROOT_LEVEL; /* * Verify that the top-level gpte is still there. Since the page * is a root page, it is either write protected (and cannot be * changed from now on) or it is invalid (in which case, we don't * really care if it changes underneath us after this point). */ if (FNAME(gpte_changed)(vcpu, gw, top_level)) return RET_PF_RETRY; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; /* * Load a new root and retry the faulting instruction in the extremely * unlikely scenario that the guest root gfn became visible between * loading a dummy root and handling the resulting page fault, e.g. if * userspace create a memslot in the interim. */ if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) { kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu); return RET_PF_RETRY; } for_each_shadow_entry(vcpu, fault->addr, it) { gfn_t table_gfn; clear_sp_write_flooding_count(it.sptep); if (it.level == gw->level) break; table_gfn = gw->table_gfn[it.level - 2]; access = gw->pt_access[it.level - 2]; sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn, false, access); /* * Synchronize the new page before linking it, as the CPU (KVM) * is architecturally disallowed from inserting non-present * entries into the TLB, i.e. the guest isn't required to flush * the TLB when changing the gPTE from non-present to present. * * For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already * synchronized the page via kvm_sync_page(). * * For higher level pages, which cannot be unsync themselves * but can have unsync children, synchronize via the slower * mmu_sync_children(). If KVM needs to drop mmu_lock due to * contention or to reschedule, instruct the caller to retry * the #PF (mmu_sync_children() ensures forward progress will * be made). */ if (sp != ERR_PTR(-EEXIST) && sp->unsync_children && mmu_sync_children(vcpu, sp, false)) return RET_PF_RETRY; /* * Verify that the gpte in the page, which is now either * write-protected or unsync, wasn't modified between the fault * and acquiring mmu_lock. This needs to be done even when * reusing an existing shadow page to ensure the information * gathered by the walker matches the information stored in the * shadow page (which could have been modified by a different * vCPU even if the page was already linked). Holding mmu_lock * prevents the shadow page from changing after this point. */ if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) return RET_PF_RETRY; if (sp != ERR_PTR(-EEXIST)) link_shadow_page(vcpu, it.sptep, sp); if (fault->write && table_gfn == fault->gfn) fault->write_fault_to_shadow_pgtable = true; } /* * Adjust the hugepage size _after_ resolving indirect shadow pages. * KVM doesn't support mapping hugepages into the guest for gfns that * are being shadowed by KVM, i.e. allocating a new shadow page may * affect the allowed hugepage size. */ kvm_mmu_hugepage_adjust(vcpu, fault); trace_kvm_mmu_spte_requested(fault); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; validate_direct_spte(vcpu, it.sptep, direct_access); sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, direct_access); if (sp == ERR_PTR(-EEXIST)) continue; link_shadow_page(vcpu, it.sptep, sp); if (fault->huge_page_disallowed) account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) return -EFAULT; ret = mmu_set_spte(vcpu, fault->slot, it.sptep, gw->pte_access, base_gfn, fault->pfn, fault); if (ret == RET_PF_SPURIOUS) return ret; FNAME(pte_prefetch)(vcpu, gw, it.sptep); return ret; } /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte * - write access through a shadow pte marked read only so that we can set * the dirty bit * - write access to a shadow pte marked read only so we can update the page * dirty bitmap, when userspace requests it * - mmio access; in this case we will never install a present shadow pte * - normal guest page fault due to the guest pte marked not present, not * writable, or not executable * * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct guest_walker walker; int r; WARN_ON_ONCE(fault->is_tdp); /* * Look up the guest pte for the faulting address. * If PFEC.RSVD is set, this is a shadow page fault. * The bit needs to be cleared before walking guest page tables. */ r = FNAME(walk_addr)(&walker, vcpu, fault->addr, fault->error_code & ~PFERR_RSVD_MASK); /* * The page is not mapped by the guest. Let the guest handle it. */ if (!r) { if (!fault->prefetch) kvm_inject_emulated_page_fault(vcpu, &walker.fault); return RET_PF_RETRY; } fault->gfn = walker.gfn; fault->max_level = walker.level; fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); if (page_fault_handle_page_track(vcpu, fault)) { shadow_page_table_clear_flood(vcpu, fault->addr); return RET_PF_WRITE_PROTECTED; } r = mmu_topup_memory_caches(vcpu, true); if (r) return r; r = kvm_faultin_pfn(vcpu, fault, walker.pte_access); if (r != RET_PF_CONTINUE) return r; /* * Do not change pte_access if the pfn is a mmio page, otherwise * we will cache the incorrect access into mmio spte. */ if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) && !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) { walker.pte_access |= ACC_WRITE_MASK; walker.pte_access &= ~ACC_USER_MASK; /* * If we converted a user page to a kernel page, * so that the kernel can write to it when cr0.wp=0, * then we should prevent the kernel from executing it * if SMEP is enabled. */ if (is_cr4_smep(vcpu->arch.mmu)) walker.pte_access &= ~ACC_EXEC_MASK; } r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; r = FNAME(fetch)(vcpu, fault, &walker); out_unlock: write_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); if (PTTYPE == 32) offset = sp->role.quadrant << SPTE_LEVEL_BITS; return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); } /* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t addr, u64 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = INVALID_GPA; int r; #ifndef CONFIG_X86_64 /* A 64-bit GVA should be impossible on 32-bit KVM. */ WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu); #endif r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; return gpa; } /* * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is * safe because: * - The spte has a reference to the struct page, so the pfn for a given gfn * can't change unless all sptes pointing to it are nuked first. * * Returns * < 0: failed to sync spte * 0: the spte is synced and no tlb flushing is required * > 0: the spte is synced and tlb flushing is required */ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { bool host_writable; gpa_t first_pte_gpa; u64 *sptep, spte; struct kvm_memory_slot *slot; unsigned pte_access; pt_element_t gpte; gpa_t pte_gpa; gfn_t gfn; if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE || !sp->shadowed_translation)) return 0; first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, sizeof(pt_element_t))) return -1; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) return 1; gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; pte_access &= FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access)) return 0; /* * Drop the SPTE if the new protections result in no effective * "present" bit or if the gfn is changing. The former case * only affects EPT with execute-only support with pte_access==0; * all other paging modes will create a read-only SPTE if * pte_access is zero. */ if ((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE || gfn != kvm_mmu_page_get_gfn(sp, i)) { drop_spte(vcpu->kvm, &sp->spt[i]); return 1; } /* * Do nothing if the permissions are unchanged. The existing SPTE is * still, and prefetch_invalid_gpte() has verified that the A/D bits * are set in the "new" gPTE, i.e. there is no danger of missing an A/D * update due to A/D bits being set in the SPTE but not the gPTE. */ if (kvm_mmu_page_get_access(sp, i) == pte_access) return 0; /* Update the shadowed access bits in case they changed. */ kvm_mmu_page_set_access(sp, i, pte_access); sptep = &sp->spt[i]; spte = *sptep; host_writable = spte & shadow_host_writable_mask; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); make_spte(vcpu, sp, slot, pte_access, gfn, spte_to_pfn(spte), spte, true, false, host_writable, &spte); return mmu_spte_update(sptep, spte); } #undef pt_element_t #undef guest_walker #undef FNAME #undef PT_BASE_ADDR_MASK #undef PT_INDEX #undef PT_LVL_ADDR_MASK #undef PT_LVL_OFFSET_MASK #undef PT_LEVEL_BITS #undef PT_MAX_FULL_LEVELS #undef gpte_to_gfn #undef gpte_to_gfn_lvl #undef PT_GUEST_ACCESSED_MASK #undef PT_GUEST_DIRTY_MASK #undef PT_GUEST_DIRTY_SHIFT #undef PT_GUEST_ACCESSED_SHIFT #undef PT_HAVE_ACCESSED_DIRTY
5 4 4 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 /* * Cryptographic API. * * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions * * Copyright (C) 2013 Intel Corporation * Author: Tim Chen <tim.c.chen@linux.intel.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-t10dif.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kernel.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); struct chksum_desc_ctx { __u16 crc; }; static int chksum_init(struct shash_desc *desc) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); ctx->crc = 0; return 0; } static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); if (length >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); kernel_fpu_end(); } else ctx->crc = crc_t10dif_generic(ctx->crc, data, length); return 0; } static int chksum_final(struct shash_desc *desc, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); *(__u16 *)out = ctx->crc; return 0; } static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { .digestsize = CRC_T10DIF_DIGEST_SIZE, .init = chksum_init, .update = chksum_update, .final = chksum_final, .finup = chksum_finup, .digest = chksum_digest, .descsize = sizeof(struct chksum_desc_ctx), .base = { .cra_name = "crct10dif", .cra_driver_name = "crct10dif-pclmul", .cra_priority = 200, .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static const struct x86_cpu_id crct10dif_cpu_id[] = { X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); static int __init crct10dif_intel_mod_init(void) { if (!x86_match_cpu(crct10dif_cpu_id)) return -ENODEV; return crypto_register_shash(&alg); } static void __exit crct10dif_intel_mod_fini(void) { crypto_unregister_shash(&alg); } module_init(crct10dif_intel_mod_init); module_exit(crct10dif_intel_mod_fini); MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crct10dif"); MODULE_ALIAS_CRYPTO("crct10dif-pclmul");
6 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> struct nft_lookup { struct nft_set *set; u8 sreg; u8 dreg; bool dreg_set; bool invert; struct nft_set_binding binding; }; #ifdef CONFIG_MITIGATION_RETPOLINE bool nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { if (set->ops == &nft_set_hash_fast_type.ops) return nft_hash_lookup_fast(net, set, key, ext); if (set->ops == &nft_set_hash_type.ops) return nft_hash_lookup(net, set, key, ext); if (set->ops == &nft_set_rhash_type.ops) return nft_rhash_lookup(net, set, key, ext); if (set->ops == &nft_set_bitmap_type.ops) return nft_bitmap_lookup(net, set, key, ext); if (set->ops == &nft_set_pipapo_type.ops) return nft_pipapo_lookup(net, set, key, ext); #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) if (set->ops == &nft_set_pipapo_avx2_type.ops) return nft_pipapo_avx2_lookup(net, set, key, ext); #endif if (set->ops == &nft_set_rbtree_type.ops) return nft_rbtree_lookup(net, set, key, ext); WARN_ON_ONCE(1); return set->ops->lookup(net, set, key, ext); } EXPORT_SYMBOL_GPL(nft_set_do_lookup); #endif void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct nft_set_ext *ext = NULL; const struct net *net = nft_net(pkt); bool found; found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^ priv->invert; if (!found) { ext = nft_set_catchall_lookup(net, set); if (!ext) { regs->verdict.code = NFT_BREAK; return; } } if (ext) { if (priv->dreg_set) nft_data_copy(&regs->data[priv->dreg], nft_set_ext_data(ext), set->dlen); nft_set_elem_update_expr(ext, regs, pkt); } } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV), }; static int nft_lookup_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_lookup *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u32 flags; int err; if (tb[NFTA_LOOKUP_SET] == NULL || tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], tb[NFTA_LOOKUP_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg, set->klen); if (err < 0) return err; if (tb[NFTA_LOOKUP_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS])); if (flags & NFT_LOOKUP_F_INV) priv->invert = true; } if (tb[NFTA_LOOKUP_DREG] != NULL) { if (priv->invert) return -EINVAL; if (!(set->flags & NFT_SET_MAP)) return -EINVAL; err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], &priv->dreg, NULL, nft_set_datatype(set), set->dlen); if (err < 0) return err; priv->dreg_set = true; } else if (set->flags & NFT_SET_MAP) { /* Map given, but user asks for lookup only (i.e. to * ignore value assoicated with key). * * This makes no sense for anonymous maps since they are * scoped to the rule, but for named sets this can be useful. */ if (set->flags & NFT_SET_ANONYMOUS) return -EINVAL; } priv->binding.flags = set->flags & NFT_SET_MAP; err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) return err; priv->set = set; return 0; } static void nft_lookup_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); } static void nft_lookup_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_destroy_set(ctx, priv->set); } static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_lookup *priv = nft_expr_priv(expr); u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; if (priv->dreg_set) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_lookup_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); struct nft_set_iter iter; if (!(priv->set->flags & NFT_SET_MAP) || priv->set->dtype != NFT_DATA_VERDICT) return 0; iter.genmask = nft_genmask_next(ctx->net); iter.type = NFT_ITER_UPDATE; iter.skip = 0; iter.count = 0; iter.err = 0; iter.fn = nft_setelem_validate; priv->set->ops->walk(ctx, priv->set, &iter); if (!iter.err) iter.err = nft_set_catchall_validate(ctx, priv->set); if (iter.err < 0) return iter.err; return 0; } static bool nft_lookup_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); if (priv->set->flags & NFT_SET_MAP) nft_reg_track_cancel(track, priv->dreg, priv->set->dlen); return false; } static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, .validate = nft_lookup_validate, .reduce = nft_lookup_reduce, }; struct nft_expr_type nft_lookup_type __read_mostly = { .name = "lookup", .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, .owner = THIS_MODULE, };
22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IA32_H #define _ASM_X86_IA32_H #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> /* * 32 bit structures for IA32 support. */ #include <uapi/asm/sigcontext.h> /* signal.h */ struct ucontext_ia32 { unsigned int uc_flags; unsigned int uc_link; compat_stack_t uc_stack; struct sigcontext_32 uc_mcontext; compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ struct stat64 { unsigned long long st_dev; unsigned char __pad0[4]; #define STAT64_HAS_BROKEN_ST_INO 1 unsigned int __st_ino; unsigned int st_mode; unsigned int st_nlink; unsigned int st_uid; unsigned int st_gid; unsigned long long st_rdev; unsigned char __pad3[4]; long long st_size; unsigned int st_blksize; long long st_blocks;/* Number 512-byte blocks allocated */ unsigned st_atime; unsigned st_atime_nsec; unsigned st_mtime; unsigned st_mtime_nsec; unsigned st_ctime; unsigned st_ctime_nsec; unsigned long long st_ino; } __attribute__((packed)); extern bool __ia32_enabled; static __always_inline bool ia32_enabled(void) { return __ia32_enabled; } static inline void ia32_disable(void) { __ia32_enabled = false; } #else /* !CONFIG_IA32_EMULATION */ static __always_inline bool ia32_enabled(void) { return IS_ENABLED(CONFIG_X86_32); } static inline void ia32_disable(void) {} #endif static inline bool ia32_enabled_verbose(void) { bool enabled = ia32_enabled(); if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled) pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n"); return enabled; } #endif /* _ASM_X86_IA32_H */
1 1 1 1 1 1 1 1 1 2 1 1 2 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* Kernel cryptographic api. * cast6.c - Cast6 cipher algorithm [rfc2612]. * * CAST-256 (*cast6*) is a DES like Substitution-Permutation Network (SPN) * cryptosystem built upon the CAST-128 (*cast5*) [rfc2144] encryption * algorithm. * * Copyright (C) 2003 Kartikey Mahendra Bhatt <kartik_me@hotmail.com>. */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <linux/init.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> #include <crypto/cast6.h> #define s1 cast_s1 #define s2 cast_s2 #define s3 cast_s3 #define s4 cast_s4 #define F1(D, r, m) ((I = ((m) + (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff])) #define F2(D, r, m) ((I = ((m) ^ (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff])) #define F3(D, r, m) ((I = ((m) - (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff])) static const u32 Tm[24][8] = { { 0x5a827999, 0xc95c653a, 0x383650db, 0xa7103c7c, 0x15ea281d, 0x84c413be, 0xf39dff5f, 0x6277eb00 } , { 0xd151d6a1, 0x402bc242, 0xaf05ade3, 0x1ddf9984, 0x8cb98525, 0xfb9370c6, 0x6a6d5c67, 0xd9474808 } , { 0x482133a9, 0xb6fb1f4a, 0x25d50aeb, 0x94aef68c, 0x0388e22d, 0x7262cdce, 0xe13cb96f, 0x5016a510 } , { 0xbef090b1, 0x2dca7c52, 0x9ca467f3, 0x0b7e5394, 0x7a583f35, 0xe9322ad6, 0x580c1677, 0xc6e60218 } , { 0x35bfedb9, 0xa499d95a, 0x1373c4fb, 0x824db09c, 0xf1279c3d, 0x600187de, 0xcedb737f, 0x3db55f20 } , { 0xac8f4ac1, 0x1b693662, 0x8a432203, 0xf91d0da4, 0x67f6f945, 0xd6d0e4e6, 0x45aad087, 0xb484bc28 } , { 0x235ea7c9, 0x9238936a, 0x01127f0b, 0x6fec6aac, 0xdec6564d, 0x4da041ee, 0xbc7a2d8f, 0x2b541930 } , { 0x9a2e04d1, 0x0907f072, 0x77e1dc13, 0xe6bbc7b4, 0x5595b355, 0xc46f9ef6, 0x33498a97, 0xa2237638 } , { 0x10fd61d9, 0x7fd74d7a, 0xeeb1391b, 0x5d8b24bc, 0xcc65105d, 0x3b3efbfe, 0xaa18e79f, 0x18f2d340 } , { 0x87ccbee1, 0xf6a6aa82, 0x65809623, 0xd45a81c4, 0x43346d65, 0xb20e5906, 0x20e844a7, 0x8fc23048 } , { 0xfe9c1be9, 0x6d76078a, 0xdc4ff32b, 0x4b29decc, 0xba03ca6d, 0x28ddb60e, 0x97b7a1af, 0x06918d50 } , { 0x756b78f1, 0xe4456492, 0x531f5033, 0xc1f93bd4, 0x30d32775, 0x9fad1316, 0x0e86feb7, 0x7d60ea58 } , { 0xec3ad5f9, 0x5b14c19a, 0xc9eead3b, 0x38c898dc, 0xa7a2847d, 0x167c701e, 0x85565bbf, 0xf4304760 } , { 0x630a3301, 0xd1e41ea2, 0x40be0a43, 0xaf97f5e4, 0x1e71e185, 0x8d4bcd26, 0xfc25b8c7, 0x6affa468 } , { 0xd9d99009, 0x48b37baa, 0xb78d674b, 0x266752ec, 0x95413e8d, 0x041b2a2e, 0x72f515cf, 0xe1cf0170 } , { 0x50a8ed11, 0xbf82d8b2, 0x2e5cc453, 0x9d36aff4, 0x0c109b95, 0x7aea8736, 0xe9c472d7, 0x589e5e78 } , { 0xc7784a19, 0x365235ba, 0xa52c215b, 0x14060cfc, 0x82dff89d, 0xf1b9e43e, 0x6093cfdf, 0xcf6dbb80 } , { 0x3e47a721, 0xad2192c2, 0x1bfb7e63, 0x8ad56a04, 0xf9af55a5, 0x68894146, 0xd7632ce7, 0x463d1888 } , { 0xb5170429, 0x23f0efca, 0x92cadb6b, 0x01a4c70c, 0x707eb2ad, 0xdf589e4e, 0x4e3289ef, 0xbd0c7590 } , { 0x2be66131, 0x9ac04cd2, 0x099a3873, 0x78742414, 0xe74e0fb5, 0x5627fb56, 0xc501e6f7, 0x33dbd298 } , { 0xa2b5be39, 0x118fa9da, 0x8069957b, 0xef43811c, 0x5e1d6cbd, 0xccf7585e, 0x3bd143ff, 0xaaab2fa0 } , { 0x19851b41, 0x885f06e2, 0xf738f283, 0x6612de24, 0xd4ecc9c5, 0x43c6b566, 0xb2a0a107, 0x217a8ca8 } , { 0x90547849, 0xff2e63ea, 0x6e084f8b, 0xdce23b2c, 0x4bbc26cd, 0xba96126e, 0x296ffe0f, 0x9849e9b0 } , { 0x0723d551, 0x75fdc0f2, 0xe4d7ac93, 0x53b19834, 0xc28b83d5, 0x31656f76, 0xa03f5b17, 0x0f1946b8 } }; static const u8 Tr[4][8] = { { 0x13, 0x04, 0x15, 0x06, 0x17, 0x08, 0x19, 0x0a } , { 0x1b, 0x0c, 0x1d, 0x0e, 0x1f, 0x10, 0x01, 0x12 } , { 0x03, 0x14, 0x05, 0x16, 0x07, 0x18, 0x09, 0x1a } , { 0x0b, 0x1c, 0x0d, 0x1e, 0x0f, 0x00, 0x11, 0x02 } }; /* forward octave */ static inline void W(u32 *key, unsigned int i) { u32 I; key[6] ^= F1(key[7], Tr[i % 4][0], Tm[i][0]); key[5] ^= F2(key[6], Tr[i % 4][1], Tm[i][1]); key[4] ^= F3(key[5], Tr[i % 4][2], Tm[i][2]); key[3] ^= F1(key[4], Tr[i % 4][3], Tm[i][3]); key[2] ^= F2(key[3], Tr[i % 4][4], Tm[i][4]); key[1] ^= F3(key[2], Tr[i % 4][5], Tm[i][5]); key[0] ^= F1(key[1], Tr[i % 4][6], Tm[i][6]); key[7] ^= F2(key[0], Tr[i % 4][7], Tm[i][7]); } int __cast6_setkey(struct cast6_ctx *c, const u8 *in_key, unsigned int key_len) { int i; u32 key[8]; __be32 p_key[8]; /* padded key */ if (key_len % 4 != 0) return -EINVAL; memset(p_key, 0, 32); memcpy(p_key, in_key, key_len); key[0] = be32_to_cpu(p_key[0]); /* A */ key[1] = be32_to_cpu(p_key[1]); /* B */ key[2] = be32_to_cpu(p_key[2]); /* C */ key[3] = be32_to_cpu(p_key[3]); /* D */ key[4] = be32_to_cpu(p_key[4]); /* E */ key[5] = be32_to_cpu(p_key[5]); /* F */ key[6] = be32_to_cpu(p_key[6]); /* G */ key[7] = be32_to_cpu(p_key[7]); /* H */ for (i = 0; i < 12; i++) { W(key, 2 * i); W(key, 2 * i + 1); c->Kr[i][0] = key[0] & 0x1f; c->Kr[i][1] = key[2] & 0x1f; c->Kr[i][2] = key[4] & 0x1f; c->Kr[i][3] = key[6] & 0x1f; c->Km[i][0] = key[7]; c->Km[i][1] = key[5]; c->Km[i][2] = key[3]; c->Km[i][3] = key[1]; } return 0; } EXPORT_SYMBOL_GPL(__cast6_setkey); int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { return __cast6_setkey(crypto_tfm_ctx(tfm), key, keylen); } EXPORT_SYMBOL_GPL(cast6_setkey); /*forward quad round*/ static inline void Q(u32 *block, const u8 *Kr, const u32 *Km) { u32 I; block[2] ^= F1(block[3], Kr[0], Km[0]); block[1] ^= F2(block[2], Kr[1], Km[1]); block[0] ^= F3(block[1], Kr[2], Km[2]); block[3] ^= F1(block[0], Kr[3], Km[3]); } /*reverse quad round*/ static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km) { u32 I; block[3] ^= F1(block[0], Kr[3], Km[3]); block[0] ^= F3(block[1], Kr[2], Km[2]); block[1] ^= F2(block[2], Kr[1], Km[1]); block[2] ^= F1(block[3], Kr[0], Km[0]); } void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; u32 block[4]; const u32 *Km; const u8 *Kr; block[0] = get_unaligned_be32(inbuf); block[1] = get_unaligned_be32(inbuf + 4); block[2] = get_unaligned_be32(inbuf + 8); block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[0]; Kr = c->Kr[0]; Q(block, Kr, Km); Km = c->Km[1]; Kr = c->Kr[1]; Q(block, Kr, Km); Km = c->Km[2]; Kr = c->Kr[2]; Q(block, Kr, Km); Km = c->Km[3]; Kr = c->Kr[3]; Q(block, Kr, Km); Km = c->Km[4]; Kr = c->Kr[4]; Q(block, Kr, Km); Km = c->Km[5]; Kr = c->Kr[5]; Q(block, Kr, Km); Km = c->Km[6]; Kr = c->Kr[6]; QBAR(block, Kr, Km); Km = c->Km[7]; Kr = c->Kr[7]; QBAR(block, Kr, Km); Km = c->Km[8]; Kr = c->Kr[8]; QBAR(block, Kr, Km); Km = c->Km[9]; Kr = c->Kr[9]; QBAR(block, Kr, Km); Km = c->Km[10]; Kr = c->Kr[10]; QBAR(block, Kr, Km); Km = c->Km[11]; Kr = c->Kr[11]; QBAR(block, Kr, Km); put_unaligned_be32(block[0], outbuf); put_unaligned_be32(block[1], outbuf + 4); put_unaligned_be32(block[2], outbuf + 8); put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_encrypt); static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) { __cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf); } void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; u32 block[4]; const u32 *Km; const u8 *Kr; block[0] = get_unaligned_be32(inbuf); block[1] = get_unaligned_be32(inbuf + 4); block[2] = get_unaligned_be32(inbuf + 8); block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[11]; Kr = c->Kr[11]; Q(block, Kr, Km); Km = c->Km[10]; Kr = c->Kr[10]; Q(block, Kr, Km); Km = c->Km[9]; Kr = c->Kr[9]; Q(block, Kr, Km); Km = c->Km[8]; Kr = c->Kr[8]; Q(block, Kr, Km); Km = c->Km[7]; Kr = c->Kr[7]; Q(block, Kr, Km); Km = c->Km[6]; Kr = c->Kr[6]; Q(block, Kr, Km); Km = c->Km[5]; Kr = c->Kr[5]; QBAR(block, Kr, Km); Km = c->Km[4]; Kr = c->Kr[4]; QBAR(block, Kr, Km); Km = c->Km[3]; Kr = c->Kr[3]; QBAR(block, Kr, Km); Km = c->Km[2]; Kr = c->Kr[2]; QBAR(block, Kr, Km); Km = c->Km[1]; Kr = c->Kr[1]; QBAR(block, Kr, Km); Km = c->Km[0]; Kr = c->Kr[0]; QBAR(block, Kr, Km); put_unaligned_be32(block[0], outbuf); put_unaligned_be32(block[1], outbuf + 4); put_unaligned_be32(block[2], outbuf + 8); put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_decrypt); static void cast6_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) { __cast6_decrypt(crypto_tfm_ctx(tfm), outbuf, inbuf); } static struct crypto_alg alg = { .cra_name = "cast6", .cra_driver_name = "cast6-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST6_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast6_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = CAST6_MIN_KEY_SIZE, .cia_max_keysize = CAST6_MAX_KEY_SIZE, .cia_setkey = cast6_setkey, .cia_encrypt = cast6_encrypt, .cia_decrypt = cast6_decrypt} } }; static int __init cast6_mod_init(void) { return crypto_register_alg(&alg); } static void __exit cast6_mod_fini(void) { crypto_unregister_alg(&alg); } subsys_initcall(cast6_mod_init); module_exit(cast6_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast6 Cipher Algorithm"); MODULE_ALIAS_CRYPTO("cast6"); MODULE_ALIAS_CRYPTO("cast6-generic");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IO_H #define _ASM_X86_IO_H /* * This file contains the definitions for the x86 IO instructions * inb/inw/inl/outb/outw/outl and the "string versions" of the same * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" * versions of the single-IO instructions (inb_p/inw_p/..). * * This file is not meant to be obfuscating: it's just complicated * to (a) handle it all in a way that makes gcc able to optimize it * as well as possible and (b) trying to avoid writing the same thing * over and over again with slight variations and possibly making a * mistake somewhere. */ /* * Thanks to James van Artsdalen for a better timing-fix than * the two short jumps: using outb's to a nonexistent port seems * to guarantee better timings even on fast machines. * * On the other hand, I'd like to be sure of a non-existent port: * I feel a bit unsafe about using 0x80 (should be safe, though) * * Linus */ /* * Bit simplified and optimized by Jan Hubicka * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. * * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, * isa_read[wl] and isa_write[wl] fixed * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/string.h> #include <linux/compiler.h> #include <linux/cc_platform.h> #include <asm/page.h> #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> #include <asm/shared/io.h> #include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ :"m" (*(volatile type __force *)addr) barrier); return ret; } #define build_mmio_write(name, size, type, reg, barrier) \ static inline void name(type val, volatile void __iomem *addr) \ { asm volatile("mov" size " %0,%1": :reg (val), \ "m" (*(volatile type __force *)addr) barrier); } build_mmio_read(readb, "b", unsigned char, "=q", :"memory") build_mmio_read(readw, "w", unsigned short, "=r", :"memory") build_mmio_read(readl, "l", unsigned int, "=r", :"memory") build_mmio_read(__readb, "b", unsigned char, "=q", ) build_mmio_read(__readw, "w", unsigned short, "=r", ) build_mmio_read(__readl, "l", unsigned int, "=r", ) build_mmio_write(writeb, "b", unsigned char, "q", :"memory") build_mmio_write(writew, "w", unsigned short, "r", :"memory") build_mmio_write(writel, "l", unsigned int, "r", :"memory") build_mmio_write(__writeb, "b", unsigned char, "q", ) build_mmio_write(__writew, "w", unsigned short, "r", ) build_mmio_write(__writel, "l", unsigned int, "r", ) #define readb readb #define readw readw #define readl readl #define readb_relaxed(a) __readb(a) #define readw_relaxed(a) __readw(a) #define readl_relaxed(a) __readl(a) #define __raw_readb __readb #define __raw_readw __readw #define __raw_readl __readl #define writeb writeb #define writew writew #define writel writel #define writeb_relaxed(v, a) __writeb(v, a) #define writew_relaxed(v, a) __writew(v, a) #define writel_relaxed(v, a) __writel(v, a) #define __raw_writeb __writeb #define __raw_writew __writew #define __raw_writel __writel #ifdef CONFIG_X86_64 build_mmio_read(readq, "q", u64, "=r", :"memory") build_mmio_read(__readq, "q", u64, "=r", ) build_mmio_write(writeq, "q", u64, "r", :"memory") build_mmio_write(__writeq, "q", u64, "r", ) #define readq_relaxed(a) __readq(a) #define writeq_relaxed(v, a) __writeq(v, a) #define __raw_readq __readq #define __raw_writeq __writeq /* Let people know that we have them */ #define readq readq #define writeq writeq #endif #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); /** * virt_to_phys - map virtual addresses to physical * @address: address to remap * * The returned physical address is the physical (CPU) mapping for * the memory address given. It is only valid to use this function on * addresses directly mapped or allocated via kmalloc. * * This function does not give bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline phys_addr_t virt_to_phys(volatile void *address) { return __pa(address); } #define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual * @address: address to remap * * The returned virtual address is a current CPU mapping for * the memory address given. It is only valid to use this function on * addresses that have a kernel mapping * * This function does not handle bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline void *phys_to_virt(phys_addr_t address) { return __va(address); } #define phys_to_virt phys_to_virt /* * Change "struct page" to physical address. */ #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) /* * ISA I/O bus memory addresses are 1:1 with the physical address. * However, we truncate the address to unsigned int to avoid undesirable * promotions in legacy drivers. */ static inline unsigned int isa_virt_to_bus(volatile void *address) { return (unsigned int)virt_to_phys(address); } #define isa_bus_to_virt phys_to_virt /* * The default ioremap() behavior is non-cached; if you need something * else, you probably want one of the following. */ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory * @size: size of the resource to map * * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual * address. * * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ void __iomem *ioremap(resource_size_t offset, unsigned long size); #define ioremap ioremap extern void iounmap(volatile void __iomem *addr); #define iounmap iounmap #ifdef __KERNEL__ void memcpy_fromio(void *, const volatile void __iomem *, size_t); void memcpy_toio(volatile void __iomem *, const void *, size_t); void memset_io(volatile void __iomem *, int, size_t); #define memcpy_fromio memcpy_fromio #define memcpy_toio memcpy_toio #define memset_io memset_io #ifdef CONFIG_X86_64 /* * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy * loop. */ static inline void __iowrite32_copy(void __iomem *to, const void *from, size_t count) { asm volatile("rep ; movsl" : "=&c"(count), "=&D"(to), "=&S"(from) : "0"(count), "1"(to), "2"(from) : "memory"); } #define __iowrite32_copy __iowrite32_copy #endif /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped * to PAGE_OFFSET is pure coincidence - it does not mean ISA values * are physical addresses. The following constant pointer can be * used as the IO-area pointer (it can be iounmapped as well, so the * analogy with PCI is quite large): */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) #endif /* __KERNEL__ */ extern void native_io_delay(void); extern int io_delay_type; extern void io_delay_init(void); #if defined(CONFIG_PARAVIRT) #include <asm/paravirt.h> #else static inline void slow_down_io(void) { native_io_delay(); #ifdef REALLY_SLOW_IO native_io_delay(); native_io_delay(); native_io_delay(); #endif } #endif #define BUILDIO(bwl, type) \ static inline void out##bwl##_p(type value, u16 port) \ { \ out##bwl(value, port); \ slow_down_io(); \ } \ \ static inline type in##bwl##_p(u16 port) \ { \ type value = in##bwl(port); \ slow_down_io(); \ return value; \ } \ \ static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ out##bwl(*value, port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep; outs" #bwl \ : "+S"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } \ \ static inline void ins##bwl(u16 port, void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ *value = in##bwl(port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep; ins" #bwl \ : "+D"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } BUILDIO(b, u8) BUILDIO(w, u16) BUILDIO(l, u32) #undef BUILDIO #define inb_p inb_p #define inw_p inw_p #define inl_p inl_p #define insb insb #define insw insw #define insl insl #define outb_p outb_p #define outw_p outw_p #define outl_p outl_p #define outsb outsb #define outsw outsw #define outsl outsl extern void *xlate_dev_mem_ptr(phys_addr_t phys); extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define xlate_dev_mem_ptr xlate_dev_mem_ptr #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); #define ioremap_wc ioremap_wc extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); #define ioremap_wt ioremap_wt extern bool is_early_ioremap_ptep(pte_t *ptep); #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> #undef PCI_IOBASE #ifdef CONFIG_MTRR extern int __must_check arch_phys_wc_index(int handle); #define arch_phys_wc_index arch_phys_wc_index extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif #ifdef CONFIG_X86_PAT extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif #ifdef CONFIG_AMD_MEM_ENCRYPT extern bool arch_memremap_can_ram_remap(resource_size_t offset, unsigned long size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); #else static inline bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) { return true; } #endif /** * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units * @dst: destination, in MMIO space (must be 512-bit aligned) * @src: source * @count: number of 512 bits quantities to submit * * Submit data from kernel space to MMIO space, in units of 512 bits at a * time. Order of access is not guaranteed, nor is a memory barrier * performed afterwards. * * Warning: Do not use this helper unless your driver has checked that the CPU * instruction is supported on the platform. */ static inline void iosubmit_cmds512(void __iomem *dst, const void *src, size_t count) { const u8 *from = src; const u8 *end = from + count * 64; while (from < end) { movdir64b_io(dst, from); from += 64; } } #endif /* _ASM_X86_IO_H */
11 8 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> #include <linux/smp.h> #include "../cpuid.h" #include "hyperv.h" #include "nested.h" #include "vmcs.h" #include "vmx.h" #include "trace.h" #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK u64 nested_get_evmptr(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); if (unlikely(kvm_hv_get_assist_page(vcpu))) return EVMPTR_INVALID; if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry)) return EVMPTR_INVALID; return hv_vcpu->vp_assist_page.current_nested_vmcs; } uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { /* * vmcs_version represents the range of supported Enlightened VMCS * versions: lower 8 bits is the minimal version, higher 8 bits is the * maximum supported version. KVM supports versions from 1 to * KVM_EVMCS_VERSION. * * Note, do not check the Hyper-V is fully enabled in guest CPUID, this * helper is used to _get_ the vCPU's supported CPUID. */ if (kvm_cpu_cap_get(X86_FEATURE_VMX) && (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled)) return (KVM_EVMCS_VERSION << 8) | 1; return 0; } enum evmcs_revision { EVMCSv1_LEGACY, NR_EVMCS_REVISIONS, }; enum evmcs_ctrl_type { EVMCS_EXIT_CTRLS, EVMCS_ENTRY_CTRLS, EVMCS_EXEC_CTRL, EVMCS_2NDEXEC, EVMCS_3RDEXEC, EVMCS_PINCTRL, EVMCS_VMFUNC, NR_EVMCS_CTRLS, }; static const u32 evmcs_supported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = { [EVMCS_EXIT_CTRLS] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMEXIT_CTRL, }, [EVMCS_ENTRY_CTRLS] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMENTRY_CTRL, }, [EVMCS_EXEC_CTRL] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_EXEC_CTRL, }, [EVMCS_2NDEXEC] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_2NDEXEC & ~SECONDARY_EXEC_TSC_SCALING, }, [EVMCS_3RDEXEC] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_3RDEXEC, }, [EVMCS_PINCTRL] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_PINCTRL, }, [EVMCS_VMFUNC] = { [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMFUNC, }, }; static u32 evmcs_get_supported_ctls(enum evmcs_ctrl_type ctrl_type) { enum evmcs_revision evmcs_rev = EVMCSv1_LEGACY; return evmcs_supported_ctrls[ctrl_type][evmcs_rev]; } static bool evmcs_has_perf_global_ctrl(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); /* * PERF_GLOBAL_CTRL has a quirk where some Windows guests may fail to * boot if a PV CPUID feature flag is not also set. Treat the fields * as unsupported if the flag is not set in guest CPUID. This should * be called only for guest accesses, and all guest accesses should be * gated on Hyper-V being enabled and initialized. */ if (WARN_ON_ONCE(!hv_vcpu)) return false; return hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL; } void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { u32 ctl_low = (u32)*pdata; u32 ctl_high = (u32)(*pdata >> 32); u32 supported_ctrls; /* * Hyper-V 2016 and 2019 try using these features even when eVMCS * is enabled but there are no corresponding fields. */ switch (msr_index) { case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: supported_ctrls = evmcs_get_supported_ctls(EVMCS_EXIT_CTRLS); if (!evmcs_has_perf_global_ctrl(vcpu)) supported_ctrls &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; ctl_high &= supported_ctrls; break; case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: supported_ctrls = evmcs_get_supported_ctls(EVMCS_ENTRY_CTRLS); if (!evmcs_has_perf_global_ctrl(vcpu)) supported_ctrls &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; ctl_high &= supported_ctrls; break; case MSR_IA32_VMX_PROCBASED_CTLS: case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ctl_high &= evmcs_get_supported_ctls(EVMCS_EXEC_CTRL); break; case MSR_IA32_VMX_PROCBASED_CTLS2: ctl_high &= evmcs_get_supported_ctls(EVMCS_2NDEXEC); break; case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: ctl_high &= evmcs_get_supported_ctls(EVMCS_PINCTRL); break; case MSR_IA32_VMX_VMFUNC: ctl_low &= evmcs_get_supported_ctls(EVMCS_VMFUNC); break; } *pdata = ctl_low | ((u64)ctl_high << 32); } static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type ctrl_type, u32 val) { return !(val & ~evmcs_get_supported_ctls(ctrl_type)); } int nested_evmcs_check_controls(struct vmcs12 *vmcs12) { if (CC(!nested_evmcs_is_valid_controls(EVMCS_PINCTRL, vmcs12->pin_based_vm_exec_control))) return -EINVAL; if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXEC_CTRL, vmcs12->cpu_based_vm_exec_control))) return -EINVAL; if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC, vmcs12->secondary_vm_exec_control))) return -EINVAL; if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXIT_CTRLS, vmcs12->vm_exit_controls))) return -EINVAL; if (CC(!nested_evmcs_is_valid_controls(EVMCS_ENTRY_CTRLS, vmcs12->vm_entry_controls))) return -EINVAL; /* * VM-Func controls are 64-bit, but KVM currently doesn't support any * controls in bits 63:32, i.e. dropping those bits on the consistency * check is intentional. */ if (WARN_ON_ONCE(vmcs12->vm_function_control >> 32)) return -EINVAL; if (CC(!nested_evmcs_is_valid_controls(EVMCS_VMFUNC, vmcs12->vm_function_control))) return -EINVAL; return 0; } int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { struct vcpu_vmx *vmx = to_vmx(vcpu); vmx->nested.enlightened_vmcs_enabled = true; if (vmcs_version) *vmcs_version = nested_get_evmcs_version(vcpu); return 0; } bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; if (!hv_vcpu || !evmcs) return false; if (!evmcs->hv_enlightenments_control.nested_flush_hypercall) return false; return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; } void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) { nested_vmx_vmexit(vcpu, HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH, 0, 0); }
70 70 70 33 33 6 194 6 2 182 3 4 2 145 5 40 4 130 5 3 48 53 181 2 10 12 5 4 16 159 159 30 24 141 150 8 16 5 137 4 154 2 2 2 105 105 104 104 104 2 103 104 104 104 104 85 96 122 122 27 1 25 2 6 18 18 3 3 2 6 2 3 1 12 12 10 12 70 40 108 1 1 2 110 110 110 56 56 56 55 56 43 1 56 53 1 55 1 55 1 56 37 19 32 27 27 42 15 56 56 27 11 12 65 65 5 47 57 6 51 51 15 13 6 1 31 6 3 3 1 2 1 17 28 15 15 14 32 45 51 12 11 3 3 12 129 115 79 30 51 49 1 308 293 17 94 94 94 57 71 71 258 129 15 127 15 242 15 15 2 2 6 12 9 2 9 2 7 4 11 4 7 194 194 214 212 1 212 210 209 206 182 137 3 34 1 3 31 28 28 3 140 20 1 6 131 129 3 3 51 101 11 221 2 219 217 7 211 185 39 226 226 135 1 9 9 2 10 9 2 9 76 76 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 // SPDX-License-Identifier: GPL-2.0-or-later /* * TCP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c * linux/net/ipv4/tcp_output.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. */ #include <linux/bottom_half.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/jiffies.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> #include <net/inet6_connection_sock.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <net/inet_ecn.h> #include <net/protocol.h> #include <net/xfrm.h> #include <net/snmp.h> #include <net/dsfield.h> #include <net/timewait_sock.h> #include <net/inet_common.h> #include <net/secure_seq.h> #include <net/hotdata.h> #include <net/busy_poll.h> #include <net/rstreason.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <crypto/hash.h> #include <linux/scatterlist.h> #include <trace/events/tcp.h> static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #endif /* Helper returning the inet6 address from a given tcp socket. * It can be used in TCP stack instead of inet6_sk(sk). * This avoids a dereference and allow compiler optimizations. * It is a specialized version of inet6_sk_generic(). */ #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ struct tcp6_sock, tcp)->inet6) static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } static u32 tcp_v6_init_seq(const struct sk_buff *skb) { return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, tcp_hdr(skb)->dest, tcp_hdr(skb)->source); } static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { /* This check is replicated from tcp_v6_connect() and intended to * prevent BPF program called below from accessing bytes that are out * of the bound specified by user in addr_len. */ if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; sock_owned_by_me(sk); return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct ipv6_txoptions *opt; struct dst_entry *dst; struct flowi6 fl6; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) { if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), &usin->sin6_addr); else usin->sin6_addr = in6addr_loopback; } addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } if (tp->rx_opt.ts_recent_stamp && !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * TCP over IPv4 */ if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { saddr = &fl6.saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); if (err) goto failure; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; sk_set_txhash(sk); if (likely(!tp->repair)) { if (!tp->write_seq) WRITE_ONCE(tp->write_seq, secure_tcpv6_seq(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) return err; if (err) goto late_failure; err = tcp_connect(sk); if (err) goto late_failure; return 0; late_failure: tcp_set_state(sk, TCP_CLOSE); inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; u32 mtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; mtu = READ_ONCE(tcp_sk(sk)->mtu_info); /* Drop requests trying to increase our current mss. * Check done in __ip6_rt_update_pmtu() is too late. */ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) return; dst = inet6_csk_update_pmtu(sk, mtu); if (!dst) return; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } } static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct net *net = dev_net(skb->dev); struct request_sock *fastopen; struct ipv6_pinfo *np; struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; bool fatal; int err; sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), skb->dev->ifindex, inet6_sdif(skb)); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { /* To increase the counter of ignored icmps for TCP-AO */ tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } seq = ntohl(th->seq); fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) { tcp_req_err(sk, seq, fatal); return 0; } if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { sock_put(sk); return 0; } bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } } tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { u32 mtu = ntohl(info); /* We are not interested in TCP_LISTEN and open_requests * (SYN-ACKs send out by Linux are always <576bytes so * they should go through unfragmented). */ if (sk->sk_state == TCP_LISTEN) goto out; if (!ip6_sk_accept_pmtu(sk)) goto out; if (mtu < IPV6_MIN_MTU) goto out; WRITE_ONCE(tp->mtu_info, mtu); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); goto out; } /* Might be for an request_sock */ switch (sk->sk_state) { case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * already accepted it is treated as a connected one below. */ if (fastopen && !fastopen->sk) break; ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) tcp_done_with_error(sk, err); else WRITE_ONCE(sk->sk_err_soft, err); goto out; case TCP_LISTEN: break; default: /* check if this ICMP message allows revert of backoff. * (see RFC 6069) */ if (!fastopen && type == ICMPV6_DEST_UNREACH && code == ICMPV6_NOROUTE) tcp_ld_RTO_revert(sk, seq); } if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } out: bh_unlock_sock(sk); sock_put(sk); return 0; } static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; u8 tclass; /* First, grab a route. */ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, IPPROTO_TCP)) == NULL) goto done; skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; if (!INET_ECN_is_capable(tclass) && tcp_bpf_ca_needs_ecn((struct sock *)req)) tclass |= INET_ECN_ECT_0; rcu_read_lock(); opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } done: return err; } static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); consume_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr, int l3index) { return tcp_md5_do_lookup(sk, l3index, (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { int l3index; l3index = l3mdev_master_ifindex_by_index(sock_net(sk), addr_sk->sk_bound_dev_if); return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, l3index); } static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; bool l3flag; u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) return -EFAULT; if (sin6->sin6_family != AF_INET6) return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && prefixlen > 32)) return -EINVAL; } else { prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); if (dev && netif_is_l3_master(dev)) l3index = dev->ifindex; rcu_read_unlock(); /* ok to reference set/not set outside of rcu; * right now device MUST be an L3 master */ if (!dev || !l3index) return -EINVAL; } if (!cmd.tcpm_keylen) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, l3index, flags); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, AF_INET6, prefixlen, l3index, flags); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } addr = (union tcp_md5_addr *)&sin6->sin6_addr; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; struct tcphdr *_th; bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); _th = (struct tcphdr *)(bp + 1); memcpy(_th, th, sizeof(*th)); _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { struct tcp_sigpool hp; if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } static int tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); const struct in6_addr *saddr, *daddr; struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; daddr = &ip6h->daddr; } if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } #endif static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb, u32 tw_isn) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); if (!tw_isn && (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn) { tcp_v6_init_req(req, sk, skb, tw_isn); if (security_inet_conn_request(sk, skb, req)) return NULL; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup_rsk, .ao_calc_key = tcp_v6_ao_calc_key_rsk, .ao_synack_hash = tcp_v6_ao_synack_hash, #endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_seq, .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 mrst = 0, *topt; struct dst_entry *dst; __u32 mark = 0; if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) tot_len += sizeof(__be32); } #endif buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (!buff) return; skb_reserve(buff, MAX_TCP_HEADER); t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); t1->dest = th->source; t1->source = th->dest; t1->doff = tot_len / 4; t1->seq = htonl(seq); t1->ack_seq = htonl(ack); t1->ack = !rst || !th->ack; t1->rst = rst; t1->window = htons(win); topt = (__be32 *)(t1 + 1); if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *topt++ = htonl(tsval); *topt++ = htonl(tsecr); } if (mrst) *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif #ifdef CONFIG_TCP_AO if (tcp_key_is_ao(key)) { *topt++ = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | (key->rcv_next)); tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, key->traffic_key, (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, t1, key->sne); } #endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; fl6.saddr = ipv6_hdr(skb)->daddr; fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); else { if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; fl6.flowi6_oif = oif; } if (sk) { if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ if (sk && sk->sk_state != TCP_TIME_WAIT) dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ else dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass & ~INET_ECN_MASK, priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); return; } kfree_skb(buff); } static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); const __u8 *md5_hash_location = NULL; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) bool allocated_traffic_key = false; #endif const struct tcp_ao_hdr *aoh; struct tcp_key key = {}; u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; #ifdef CONFIG_TCP_MD5SIG unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; #endif if (th->rst) return; /* If sk not NULL, it means we did a successful lookup and incoming * route had to be correct. prequeue might have dropped our dst. */ if (!sk && !ipv6_unicast_destination(skb)) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); #endif #ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; /* sdif set, means packet ingressed via a device * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. * we are not loose security here: * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), dif, sdif); if (!sk1) goto out; /* sdif set, means packet ingressed via a device * in an L3 domain and dif is set to it. */ l3index = tcp_v6_sdif(skb) ? dif : 0; key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); if (!key.md5_key) goto out; key.type = TCP_KEY_MD5; genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif if (th->ack) seq = ntohl(th->ack_seq); else ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); #ifdef CONFIG_TCP_AO if (aoh) { int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, &key.ao_key, &key.traffic_key, &allocated_traffic_key, &key.rcv_next, &key.sne)) goto out; key.type = TCP_KEY_AO; } #endif if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); priority = inet_twsk(sk)->tw_priority; txhash = inet_twsk(sk)->tw_txhash; } } else { if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) label = ip6_flowlabel(ipv6h); } trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h), label, priority, txhash, &key); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: if (allocated_traffic_key) kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); struct tcp_key key = {}; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; if (static_branch_unlikely(&tcp_ao_needed.key)) { /* FIXME: the segment to-be-acked is not verified yet */ ao_info = rcu_dereference(tcptw->ao_info); if (ao_info) { const struct tcp_ao_hdr *aoh; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto out; if (aoh) key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); } } if (key.ao_key) { struct tcp_ao_key *rnext_key; key.traffic_key = snd_other_key(key.ao_key); /* rcv_next switches to our rcv_next */ rnext_key = READ_ONCE(ao_info->rnext_key); key.rcv_next = rnext_key->rcvid; key.sne = READ_ONCE(ao_info->snd_sne); key.type = TCP_KEY_AO; #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; #endif } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, READ_ONCE(tcptw->tw_rcv_nxt), tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO out: #endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct tcp_key key = {}; #ifdef CONFIG_TCP_AO if (static_branch_unlikely(&tcp_ao_needed.key) && tcp_rsk_used_ao(req)) { const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; const struct tcp_ao_hdr *aoh; int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) return; if (!aoh) return; key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, aoh->rnext_keyid, -1); if (unlikely(!key.ao_key)) { /* Send ACK with any matching MKT for the peer */ key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, -1, -1); /* Matching key disappeared (user removed the key?) * let the handshake timeout. */ if (!key.ao_key) { net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", addr, ntohs(tcp_hdr(skb)->source), &ipv6_hdr(skb)->daddr, ntohs(tcp_hdr(skb)->dest)); return; } } key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); if (!key.traffic_key) return; key.type = TCP_KEY_AO; key.rcv_next = aoh->keyid; tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index); if (key.md5_key)