Total coverage: 262914 (15%)of 1814351
16 18 19 8 16 3 1 16 46 46 41 41 59 58 5 57 59 39 1 1 37 13 3 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 // SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA * Copyright (c) 2012 David Airlie <airlied@linux.ie> * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/mm.h> #include <linux/module.h> #include <linux/rbtree.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <drm/drm_mm.h> #include <drm/drm_vma_manager.h> /** * DOC: vma offset manager * * The vma-manager is responsible to map arbitrary driver-dependent memory * regions into the linear user address-space. It provides offsets to the * caller which can then be used on the address_space of the drm-device. It * takes care to not overlap regions, size them appropriately and to not * confuse mm-core by inconsistent fake vm_pgoff fields. * Drivers shouldn't use this for object placement in VMEM. This manager should * only be used to manage mappings into linear user-space VMs. * * We use drm_mm as backend to manage object allocations. But it is highly * optimized for alloc/free calls, not lookups. Hence, we use an rb-tree to * speed up offset lookups. * * You must not use multiple offset managers on a single address_space. * Otherwise, mm-core will be unable to tear down memory mappings as the VM will * no longer be linear. * * This offset manager works on page-based addresses. That is, every argument * and return code (with the exception of drm_vma_node_offset_addr()) is given * in number of pages, not number of bytes. That means, object sizes and offsets * must always be page-aligned (as usual). * If you want to get a valid byte-based user-space address for a given offset, * please see drm_vma_node_offset_addr(). * * Additionally to offset management, the vma offset manager also handles access * management. For every open-file context that is allowed to access a given * node, you must call drm_vma_node_allow(). Otherwise, an mmap() call on this * open-file with the offset of the node will fail with -EACCES. To revoke * access again, use drm_vma_node_revoke(). However, the caller is responsible * for destroying already existing mappings, if required. */ /** * drm_vma_offset_manager_init - Initialize new offset-manager * @mgr: Manager object * @page_offset: Offset of available memory area (page-based) * @size: Size of available address space range (page-based) * * Initialize a new offset-manager. The offset and area size available for the * manager are given as @page_offset and @size. Both are interpreted as * page-numbers, not bytes. * * Adding/removing nodes from the manager is locked internally and protected * against concurrent access. However, node allocation and destruction is left * for the caller. While calling into the vma-manager, a given node must * always be guaranteed to be referenced. */ void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr, unsigned long page_offset, unsigned long size) { rwlock_init(&mgr->vm_lock); drm_mm_init(&mgr->vm_addr_space_mm, page_offset, size); } EXPORT_SYMBOL(drm_vma_offset_manager_init); /** * drm_vma_offset_manager_destroy() - Destroy offset manager * @mgr: Manager object * * Destroy an object manager which was previously created via * drm_vma_offset_manager_init(). The caller must remove all allocated nodes * before destroying the manager. Otherwise, drm_mm will refuse to free the * requested resources. * * The manager must not be accessed after this function is called. */ void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr) { drm_mm_takedown(&mgr->vm_addr_space_mm); } EXPORT_SYMBOL(drm_vma_offset_manager_destroy); /** * drm_vma_offset_lookup_locked() - Find node in offset space * @mgr: Manager object * @start: Start address for object (page-based) * @pages: Size of object (page-based) * * Find a node given a start address and object size. This returns the _best_ * match for the given node. That is, @start may point somewhere into a valid * region and the given node will be returned, as long as the node spans the * whole requested area (given the size in number of pages as @pages). * * Note that before lookup the vma offset manager lookup lock must be acquired * with drm_vma_offset_lock_lookup(). See there for an example. This can then be * used to implement weakly referenced lookups using kref_get_unless_zero(). * * Example: * * :: * * drm_vma_offset_lock_lookup(mgr); * node = drm_vma_offset_lookup_locked(mgr); * if (node) * kref_get_unless_zero(container_of(node, sth, entr)); * drm_vma_offset_unlock_lookup(mgr); * * RETURNS: * Returns NULL if no suitable node can be found. Otherwise, the best match * is returned. It's the caller's responsibility to make sure the node doesn't * get destroyed before the caller can access it. */ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr, unsigned long start, unsigned long pages) { struct drm_mm_node *node, *best; struct rb_node *iter; unsigned long offset; iter = mgr->vm_addr_space_mm.interval_tree.rb_root.rb_node; best = NULL; while (likely(iter)) { node = rb_entry(iter, struct drm_mm_node, rb); offset = node->start; if (start >= offset) { iter = iter->rb_right; best = node; if (start == offset) break; } else { iter = iter->rb_left; } } /* verify that the node spans the requested area */ if (best) { offset = best->start + best->size; if (offset < start + pages) best = NULL; } if (!best) return NULL; return container_of(best, struct drm_vma_offset_node, vm_node); } EXPORT_SYMBOL(drm_vma_offset_lookup_locked); /** * drm_vma_offset_add() - Add offset node to manager * @mgr: Manager object * @node: Node to be added * @pages: Allocation size visible to user-space (in number of pages) * * Add a node to the offset-manager. If the node was already added, this does * nothing and return 0. @pages is the size of the object given in number of * pages. * After this call succeeds, you can access the offset of the node until it * is removed again. * * If this call fails, it is safe to retry the operation or call * drm_vma_offset_remove(), anyway. However, no cleanup is required in that * case. * * @pages is not required to be the same size as the underlying memory object * that you want to map. It only limits the size that user-space can map into * their address space. * * RETURNS: * 0 on success, negative error code on failure. */ int drm_vma_offset_add(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node, unsigned long pages) { int ret = 0; write_lock(&mgr->vm_lock); if (!drm_mm_node_allocated(&node->vm_node)) ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node, pages); write_unlock(&mgr->vm_lock); return ret; } EXPORT_SYMBOL(drm_vma_offset_add); /** * drm_vma_offset_remove() - Remove offset node from manager * @mgr: Manager object * @node: Node to be removed * * Remove a node from the offset manager. If the node wasn't added before, this * does nothing. After this call returns, the offset and size will be 0 until a * new offset is allocated via drm_vma_offset_add() again. Helper functions like * drm_vma_node_start() and drm_vma_node_offset_addr() will return 0 if no * offset is allocated. */ void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr, struct drm_vma_offset_node *node) { write_lock(&mgr->vm_lock); if (drm_mm_node_allocated(&node->vm_node)) { drm_mm_remove_node(&node->vm_node); memset(&node->vm_node, 0, sizeof(node->vm_node)); } write_unlock(&mgr->vm_lock); } EXPORT_SYMBOL(drm_vma_offset_remove); static int vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag, bool ref_counted) { struct rb_node **iter; struct rb_node *parent = NULL; struct drm_vma_offset_file *new, *entry; int ret = 0; /* Preallocate entry to avoid atomic allocations below. It is quite * unlikely that an open-file is added twice to a single node so we * don't optimize for this case. OOM is checked below only if the entry * is actually used. */ new = kmalloc(sizeof(*entry), GFP_KERNEL); write_lock(&node->vm_lock); iter = &node->vm_files.rb_node; while (likely(*iter)) { parent = *iter; entry = rb_entry(*iter, struct drm_vma_offset_file, vm_rb); if (tag == entry->vm_tag) { if (ref_counted) entry->vm_count++; goto unlock; } else if (tag > entry->vm_tag) { iter = &(*iter)->rb_right; } else { iter = &(*iter)->rb_left; } } if (!new) { ret = -ENOMEM; goto unlock; } new->vm_tag = tag; new->vm_count = 1; rb_link_node(&new->vm_rb, parent, iter); rb_insert_color(&new->vm_rb, &node->vm_files); new = NULL; unlock: write_unlock(&node->vm_lock); kfree(new); return ret; } /** * drm_vma_node_allow - Add open-file to list of allowed users * @node: Node to modify * @tag: Tag of file to remove * * Add @tag to the list of allowed open-files for this node. If @tag is * already on this list, the ref-count is incremented. * * The list of allowed-users is preserved across drm_vma_offset_add() and * drm_vma_offset_remove() calls. You may even call it if the node is currently * not added to any offset-manager. * * You must remove all open-files the same number of times as you added them * before destroying the node. Otherwise, you will leak memory. * * This is locked against concurrent access internally. * * RETURNS: * 0 on success, negative error code on internal failure (out-of-mem) */ int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag) { return vma_node_allow(node, tag, true); } EXPORT_SYMBOL(drm_vma_node_allow); /** * drm_vma_node_allow_once - Add open-file to list of allowed users * @node: Node to modify * @tag: Tag of file to remove * * Add @tag to the list of allowed open-files for this node. * * The list of allowed-users is preserved across drm_vma_offset_add() and * drm_vma_offset_remove() calls. You may even call it if the node is currently * not added to any offset-manager. * * This is not ref-counted unlike drm_vma_node_allow() hence drm_vma_node_revoke() * should only be called once after this. * * This is locked against concurrent access internally. * * RETURNS: * 0 on success, negative error code on internal failure (out-of-mem) */ int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag) { return vma_node_allow(node, tag, false); } EXPORT_SYMBOL(drm_vma_node_allow_once); /** * drm_vma_node_revoke - Remove open-file from list of allowed users * @node: Node to modify * @tag: Tag of file to remove * * Decrement the ref-count of @tag in the list of allowed open-files on @node. * If the ref-count drops to zero, remove @tag from the list. You must call * this once for every drm_vma_node_allow() on @tag. * * This is locked against concurrent access internally. * * If @tag is not on the list, nothing is done. */ void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct drm_file *tag) { struct drm_vma_offset_file *entry; struct rb_node *iter; write_lock(&node->vm_lock); iter = node->vm_files.rb_node; while (likely(iter)) { entry = rb_entry(iter, struct drm_vma_offset_file, vm_rb); if (tag == entry->vm_tag) { if (!--entry->vm_count) { rb_erase(&entry->vm_rb, &node->vm_files); kfree(entry); } break; } else if (tag > entry->vm_tag) { iter = iter->rb_right; } else { iter = iter->rb_left; } } write_unlock(&node->vm_lock); } EXPORT_SYMBOL(drm_vma_node_revoke); /** * drm_vma_node_is_allowed - Check whether an open-file is granted access * @node: Node to check * @tag: Tag of file to remove * * Search the list in @node whether @tag is currently on the list of allowed * open-files (see drm_vma_node_allow()). * * This is locked against concurrent access internally. * * RETURNS: * true if @filp is on the list */ bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node, struct drm_file *tag) { struct drm_vma_offset_file *entry; struct rb_node *iter; read_lock(&node->vm_lock); iter = node->vm_files.rb_node; while (likely(iter)) { entry = rb_entry(iter, struct drm_vma_offset_file, vm_rb); if (tag == entry->vm_tag) break; else if (tag > entry->vm_tag) iter = iter->rb_right; else iter = iter->rb_left; } read_unlock(&node->vm_lock); return iter; } EXPORT_SYMBOL(drm_vma_node_is_allowed);
19 19 1 18 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 /* * llc_s_ac.c - actions performed during sap state transition. * * Description : * Functions in this module are implementation of sap component actions. * Details of actions can be found in IEEE-802.2 standard document. * All functions have one sap and one event as input argument. All of * them return 0 On success and 1 otherwise. * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/netdevice.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_sap.h> /** * llc_sap_action_unitdata_ind - forward UI PDU to network layer * @sap: SAP * @skb: the event to forward * * Received a UI PDU from MAC layer; forward to network layer as a * UNITDATA INDICATION; verify our event is the kind we expect */ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) { llc_sap_rtn_pdu(sap, skb); return 0; } /** * llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer * @sap: SAP * @skb: the event to send * * Sends a UI PDU to the MAC layer in response to a UNITDATA REQUEST * primitive from the network layer. Verifies event is a primitive type of * event. Verify the primitive is a UNITDATA REQUEST. */ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); if (likely(!rc)) { skb_get(skb); rc = dev_queue_xmit(skb); } return rc; } /** * llc_sap_action_send_xid_c - send XID PDU as response to XID REQ * @sap: SAP * @skb: the event to send * * Send a XID command PDU to MAC layer in response to a XID REQUEST * primitive from the network layer. Verify event is a primitive type * event. Verify the primitive is a XID REQUEST. */ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); if (likely(!rc)) { skb_get(skb); rc = dev_queue_xmit(skb); } return rc; } /** * llc_sap_action_send_xid_r - send XID PDU resp to MAC for received XID * @sap: SAP * @skb: the event to send * * Send XID response PDU to MAC in response to an earlier received XID * command PDU. Verify event is a PDU type event */ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) { u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap; int rc = 1; struct sk_buff *nskb; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, sizeof(struct llc_xid_info)); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; } /** * llc_sap_action_send_test_c - send TEST PDU to MAC in resp to TEST REQ * @sap: SAP * @skb: the event to send * * Send a TEST command PDU to the MAC layer in response to a TEST REQUEST * primitive from the network layer. Verify event is a primitive type * event; verify the primitive is a TEST REQUEST. */ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); if (likely(!rc)) { skb_get(skb); rc = dev_queue_xmit(skb); } return rc; } int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) { u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap; struct sk_buff *nskb; int rc = 1; u32 data_size; if (skb->mac_len < ETH_HLEN) return 1; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); /* The test request command is type U (llc_len = 3) */ data_size = ntohs(eth_hdr(skb)->h_proto) - 3; nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; } /** * llc_sap_action_report_status - report data link status to layer mgmt * @sap: SAP * @skb: the event to send * * Report data link status to layer management. Verify our event is the * kind we expect. */ int llc_sap_action_report_status(struct llc_sap *sap, struct sk_buff *skb) { return 0; } /** * llc_sap_action_xid_ind - send XID PDU resp to net layer via XID IND * @sap: SAP * @skb: the event to send * * Send a XID response PDU to the network layer via a XID INDICATION * primitive. */ int llc_sap_action_xid_ind(struct llc_sap *sap, struct sk_buff *skb) { llc_sap_rtn_pdu(sap, skb); return 0; } /** * llc_sap_action_test_ind - send TEST PDU to net layer via TEST IND * @sap: SAP * @skb: the event to send * * Send a TEST response PDU to the network layer via a TEST INDICATION * primitive. Verify our event is a PDU type event. */ int llc_sap_action_test_ind(struct llc_sap *sap, struct sk_buff *skb) { llc_sap_rtn_pdu(sap, skb); return 0; }
96 96 96 95 96 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 // SPDX-License-Identifier: GPL-2.0-only /* * HT handling * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH * Copyright(c) 2020-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "rate.h" static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa, struct ieee80211_ht_cap *ht_capa_mask, struct ieee80211_sta_ht_cap *ht_cap, u16 flag) { __le16 le_flag = cpu_to_le16(flag); if (ht_capa_mask->cap_info & le_flag) { if (!(ht_capa->cap_info & le_flag)) ht_cap->cap &= ~flag; } } static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa, struct ieee80211_ht_cap *ht_capa_mask, struct ieee80211_sta_ht_cap *ht_cap, u16 flag) { __le16 le_flag = cpu_to_le16(flag); if ((ht_capa_mask->cap_info & le_flag) && (ht_capa->cap_info & le_flag)) ht_cap->cap |= flag; } void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap) { struct ieee80211_ht_cap *ht_capa, *ht_capa_mask; u8 *scaps, *smask; int i; if (!ht_cap->ht_supported) return; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ht_capa = &sdata->u.mgd.ht_capa; ht_capa_mask = &sdata->u.mgd.ht_capa_mask; break; case NL80211_IFTYPE_ADHOC: ht_capa = &sdata->u.ibss.ht_capa; ht_capa_mask = &sdata->u.ibss.ht_capa_mask; break; default: WARN_ON_ONCE(1); return; } scaps = (u8 *)(&ht_capa->mcs.rx_mask); smask = (u8 *)(&ht_capa_mask->mcs.rx_mask); /* NOTE: If you add more over-rides here, update register_hw * ht_capa_mod_mask logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix * the check in ieee80211_add_ht_ie. */ /* check for HT over-rides, MCS rates first. */ for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { u8 m = smask[i]; ht_cap->mcs.rx_mask[i] &= ~m; /* turn off all masked bits */ /* Add back rates that are supported */ ht_cap->mcs.rx_mask[i] |= (m & scaps[i]); } /* Force removal of HT-40 capabilities? */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SGI_40); /* Allow user to disable SGI-20 (SGI-40 is handled above) */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_SGI_20); /* Allow user to disable the max-AMSDU bit. */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); /* Allow user to disable LDPC */ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_LDPC_CODING); /* Allow user to enable 40 MHz intolerant bit. */ __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_40MHZ_INTOLERANT); /* Allow user to enable TX STBC bit */ __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_TX_STBC); /* Allow user to configure RX STBC bits */ if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC)) ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) & IEEE80211_HT_CAP_RX_STBC; /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { u8 n = ht_capa->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR; if (n < ht_cap->ampdu_factor) ht_cap->ampdu_factor = n; } /* Allow the user to increase AMPDU density. */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_DENSITY) { u8 n = (ht_capa->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; if (n > ht_cap->ampdu_density) ht_cap->ampdu_density = n; } } bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta) { struct ieee80211_bss_conf *link_conf; struct sta_info *sta = link_sta->sta; struct ieee80211_sta_ht_cap ht_cap, own_cap; u8 ampdu_info, tx_mcs_set_cap; int i, max_tx_streams; bool changed; enum ieee80211_sta_rx_bandwidth bw; enum nl80211_chan_width width; memset(&ht_cap, 0, sizeof(ht_cap)); if (!ht_cap_ie || !sband->ht_cap.ht_supported) goto apply; ht_cap.ht_supported = true; own_cap = sband->ht_cap; /* * If user has specified capability over-rides, take care * of that if the station we're setting up is the AP or TDLS peer that * we advertised a restricted capability set to. Override * our own capabilities and then use those below. */ if (sdata->vif.type == NL80211_IFTYPE_STATION || sdata->vif.type == NL80211_IFTYPE_ADHOC) ieee80211_apply_htcap_overrides(sdata, &own_cap); /* * The bits listed in this expression should be * the same for the peer and us, if the station * advertises more then we can't use those thus * we mask them out. */ ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40)); /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. */ if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; ht_cap.ampdu_factor = ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; ht_cap.ampdu_density = (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ tx_mcs_set_cap = own_cap.mcs.tx_params; /* Copy peer MCS TX capabilities, the driver might need them. */ ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) goto apply; /* Counting from 0, therefore +1 */ if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) max_tx_streams = ((tx_mcs_set_cap & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; else max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; /* * 802.11n-2009 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] * - remainder are multiple spatial streams using unequal modulation */ for (i = 0; i < max_tx_streams; i++) ht_cap.mcs.rx_mask[i] = own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) ht_cap.mcs.rx_mask[i] = own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) ht_cap.mcs.rx_mask[32/8] |= 1; /* set Rx highest rate */ ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU) link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; else link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; ieee80211_sta_recalc_aggregates(&sta->sta); apply: changed = memcmp(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); memcpy(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_sta->link_id]); if (WARN_ON(!link_conf)) width = NL80211_CHAN_WIDTH_20_NOHT; else width = link_conf->chanreq.oper.width; switch (width) { default: WARN_ON_ONCE(1); fallthrough; case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: bw = IEEE80211_STA_RX_BW_20; break; case NL80211_CHAN_WIDTH_40: case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: case NL80211_CHAN_WIDTH_320: bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; break; } rcu_read_unlock(); link_sta->pub->bandwidth = bw; link_sta->cur_max_bandwidth = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { enum ieee80211_smps_mode smps_mode; switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> IEEE80211_HT_CAP_SM_PS_SHIFT) { case WLAN_HT_CAP_SM_PS_INVALID: case WLAN_HT_CAP_SM_PS_STATIC: smps_mode = IEEE80211_SMPS_STATIC; break; case WLAN_HT_CAP_SM_PS_DYNAMIC: smps_mode = IEEE80211_SMPS_DYNAMIC; break; case WLAN_HT_CAP_SM_PS_DISABLED: smps_mode = IEEE80211_SMPS_OFF; break; } if (smps_mode != link_sta->pub->smps_mode) changed = true; link_sta->pub->smps_mode = smps_mode; } else { link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } return changed; } void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason) { int i; lockdep_assert_wiphy(sta->local->hw.wiphy); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && reason != AGG_STOP_PEER_REQUEST); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_tx_ba_session(sta, i, reason); /* * In case the tear down is part of a reconfigure due to HW restart * request, it is possible that the low level driver requested to stop * the BA session, so handle it to properly clean tid_tx data. */ if(reason == AGG_STOP_DESTROY_STA) { wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { struct tid_ampdu_tx *tid_tx = rcu_dereference_protected_tid_tx(sta, i); if (!tid_tx) continue; if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, i, tid_tx); } } } void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) { struct sta_info *sta = container_of(work, struct sta_info, ampdu_mlme.work); struct tid_ampdu_tx *tid_tx; bool blocked; int tid; lockdep_assert_wiphy(sta->local->hw.wiphy); /* When this flag is set, new sessions should be blocked. */ blocked = test_sta_flag(sta, WLAN_STA_BLOCK_BA); for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_TIMEOUT, true); if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_stop_requested)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); if (!blocked && test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, false, true, 0); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, 0, false); spin_lock_bh(&sta->lock); tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (!blocked && tid_tx) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); struct ieee80211_sub_if_data *sdata = vif_to_sdata(txqi->txq.vif); struct fq *fq = &sdata->local->fq; spin_lock_bh(&fq->lock); /* Allow only frags to be dequeued */ set_bit(IEEE80211_TXQ_STOP, &txqi->flags); if (!skb_queue_empty(&txqi->frags)) { /* Fragmented Tx is ongoing, wait for it to * finish. Reschedule worker to retry later. */ spin_unlock_bh(&fq->lock); spin_unlock_bh(&sta->lock); /* Give the task working on the txq a chance * to send out the queued frags */ synchronize_net(); wiphy_work_queue(sdata->local->hw.wiphy, work); return; } spin_unlock_bh(&fq->lock); /* * Assign it over to the normal tid_tx array * where it "goes live". */ sta->ampdu_mlme.tid_start_tx[tid] = NULL; /* could there be a race? */ if (sta->ampdu_mlme.tid_tx[tid]) kfree(tid_tx); else ieee80211_assign_tid_tx(sta, tid, tid_tx); spin_unlock_bh(&sta->lock); ieee80211_tx_ba_session_handle_start(sta, tid); continue; } spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) continue; if (!blocked && test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state)) ieee80211_start_tx_ba_cb(sta, tid, tid_tx); if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_LOCAL_REQUEST); if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, tid, tid_tx); } } void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 params; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); mgmt->u.action.category = WLAN_CATEGORY_BACK; mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; params = (u16)(initiator << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ mgmt->u.action.u.delba.params = cpu_to_le16(params); mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); ieee80211_tx_skb(sdata, skb); } void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { u16 tid, params; u16 initiator; params = le16_to_cpu(mgmt->u.action.u.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", mgmt->sa, initiator ? "initiator" : "recipient", tid, le16_to_cpu(mgmt->u.action.u.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, true); else __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } enum nl80211_smps_mode ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps) { switch (smps) { case IEEE80211_SMPS_OFF: return NL80211_SMPS_OFF; case IEEE80211_SMPS_STATIC: return NL80211_SMPS_STATIC; case IEEE80211_SMPS_DYNAMIC: return NL80211_SMPS_DYNAMIC; default: return NL80211_SMPS_OFF; } } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid, int link_id) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *action_frame; struct ieee80211_tx_info *info; u8 status_link_id = link_id < 0 ? 0 : link_id; /* 27 = header + category + action + smps mode */ skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); if (!skb) return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); action_frame = skb_put(skb, 27); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_HT; action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; switch (smps) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; break; case IEEE80211_SMPS_STATIC: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_STATIC; break; case IEEE80211_SMPS_DYNAMIC: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DYNAMIC; break; } /* we'll do more on status of this frame */ info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; /* we have 13 bits, and need 6: link_id 4, smps 2 */ info->status_data = IEEE80211_STATUS_TYPE_SMPS | u16_encode_bits(status_link_id << 2 | smps, IEEE80211_STATUS_SUBDATA_MASK); ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id, enum ieee80211_smps_mode smps_mode) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) goto out; trace_api_request_smps(sdata->local, sdata, link, smps_mode); if (link->u.mgd.driver_smps_mode == smps_mode) goto out; link->u.mgd.driver_smps_mode = smps_mode; wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.request_smps_work); out: rcu_read_unlock(); } /* this might change ... don't want non-open drivers using it */ EXPORT_SYMBOL_GPL(ieee80211_request_smps);
1 1 1 19 19 17 5 3 3 3 5 5 4 1 1 1 1 1 3 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2003-2008 Takahiro Hirofuchi * Copyright (C) 2015-2016 Samsung Electronics * Krzysztof Opasiak <k.opasiak@samsung.com> */ #include <asm/byteorder.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <net/sock.h> #include "usbip_common.h" #define DRIVER_AUTHOR "Takahiro Hirofuchi <hirofuchi@users.sourceforge.net>" #define DRIVER_DESC "USB/IP Core" #ifdef CONFIG_USBIP_DEBUG unsigned long usbip_debug_flag = 0xffffffff; #else unsigned long usbip_debug_flag; #endif EXPORT_SYMBOL_GPL(usbip_debug_flag); module_param(usbip_debug_flag, ulong, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(usbip_debug_flag, "debug flags (defined in usbip_common.h)"); /* FIXME */ struct device_attribute dev_attr_usbip_debug; EXPORT_SYMBOL_GPL(dev_attr_usbip_debug); static ssize_t usbip_debug_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", usbip_debug_flag); } static ssize_t usbip_debug_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { if (sscanf(buf, "%lx", &usbip_debug_flag) != 1) return -EINVAL; return count; } DEVICE_ATTR_RW(usbip_debug); static void usbip_dump_buffer(char *buff, int bufflen) { print_hex_dump(KERN_DEBUG, "usbip-core", DUMP_PREFIX_OFFSET, 16, 4, buff, bufflen, false); } static void usbip_dump_pipe(unsigned int p) { unsigned char type = usb_pipetype(p); unsigned char ep = usb_pipeendpoint(p); unsigned char dev = usb_pipedevice(p); unsigned char dir = usb_pipein(p); pr_debug("dev(%d) ep(%d) [%s] ", dev, ep, dir ? "IN" : "OUT"); switch (type) { case PIPE_ISOCHRONOUS: pr_debug("ISO\n"); break; case PIPE_INTERRUPT: pr_debug("INT\n"); break; case PIPE_CONTROL: pr_debug("CTRL\n"); break; case PIPE_BULK: pr_debug("BULK\n"); break; default: pr_debug("ERR\n"); break; } } static void usbip_dump_usb_device(struct usb_device *udev) { struct device *dev = &udev->dev; int i; dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) pr_debug(" %2u", i); pr_debug("\n"); dev_dbg(dev, " toggle0(IN) :"); for (i = 0; i < 16; i++) pr_debug(" %2u", (udev->toggle[0] & (1 << i)) ? 1 : 0); pr_debug("\n"); dev_dbg(dev, " toggle1(OUT):"); for (i = 0; i < 16; i++) pr_debug(" %2u", (udev->toggle[1] & (1 << i)) ? 1 : 0); pr_debug("\n"); dev_dbg(dev, " epmaxp_in :"); for (i = 0; i < 16; i++) { if (udev->ep_in[i]) pr_debug(" %2u", le16_to_cpu(udev->ep_in[i]->desc.wMaxPacketSize)); } pr_debug("\n"); dev_dbg(dev, " epmaxp_out :"); for (i = 0; i < 16; i++) { if (udev->ep_out[i]) pr_debug(" %2u", le16_to_cpu(udev->ep_out[i]->desc.wMaxPacketSize)); } pr_debug("\n"); dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); dev_dbg(dev, "maxchild %d\n", udev->maxchild); } static void usbip_dump_request_type(__u8 rt) { switch (rt & USB_RECIP_MASK) { case USB_RECIP_DEVICE: pr_debug("DEVICE"); break; case USB_RECIP_INTERFACE: pr_debug("INTERF"); break; case USB_RECIP_ENDPOINT: pr_debug("ENDPOI"); break; case USB_RECIP_OTHER: pr_debug("OTHER "); break; default: pr_debug("------"); break; } } static void usbip_dump_usb_ctrlrequest(struct usb_ctrlrequest *cmd) { if (!cmd) { pr_debug(" : null pointer\n"); return; } pr_debug(" "); pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) wLength(%04X) ", cmd->bRequestType, cmd->bRequest, cmd->wValue, cmd->wIndex, cmd->wLength); pr_debug("\n "); if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD) { pr_debug("STANDARD "); switch (cmd->bRequest) { case USB_REQ_GET_STATUS: pr_debug("GET_STATUS\n"); break; case USB_REQ_CLEAR_FEATURE: pr_debug("CLEAR_FEAT\n"); break; case USB_REQ_SET_FEATURE: pr_debug("SET_FEAT\n"); break; case USB_REQ_SET_ADDRESS: pr_debug("SET_ADDRRS\n"); break; case USB_REQ_GET_DESCRIPTOR: pr_debug("GET_DESCRI\n"); break; case USB_REQ_SET_DESCRIPTOR: pr_debug("SET_DESCRI\n"); break; case USB_REQ_GET_CONFIGURATION: pr_debug("GET_CONFIG\n"); break; case USB_REQ_SET_CONFIGURATION: pr_debug("SET_CONFIG\n"); break; case USB_REQ_GET_INTERFACE: pr_debug("GET_INTERF\n"); break; case USB_REQ_SET_INTERFACE: pr_debug("SET_INTERF\n"); break; case USB_REQ_SYNCH_FRAME: pr_debug("SYNC_FRAME\n"); break; default: pr_debug("REQ(%02X)\n", cmd->bRequest); break; } usbip_dump_request_type(cmd->bRequestType); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) { pr_debug("CLASS\n"); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) { pr_debug("VENDOR\n"); } else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_RESERVED) { pr_debug("RESERVED\n"); } } void usbip_dump_urb(struct urb *urb) { struct device *dev; if (!urb) { pr_debug("urb: null pointer!!\n"); return; } if (!urb->dev) { pr_debug("urb->dev: null pointer!!\n"); return; } dev = &urb->dev->dev; usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); usbip_dump_pipe(urb->pipe); dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( (struct usb_ctrlrequest *)urb->setup_packet); dev_dbg(dev, " start_frame :%d\n", urb->start_frame); dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); } EXPORT_SYMBOL_GPL(usbip_dump_urb); void usbip_dump_header(struct usbip_header *pdu) { pr_debug("BASE: cmd %u seq %u devid %u dir %u ep %u\n", pdu->base.command, pdu->base.seqnum, pdu->base.devid, pdu->base.direction, pdu->base.ep); switch (pdu->base.command) { case USBIP_CMD_SUBMIT: pr_debug("USBIP_CMD_SUBMIT: x_flags %u x_len %u sf %u #p %d iv %d\n", pdu->u.cmd_submit.transfer_flags, pdu->u.cmd_submit.transfer_buffer_length, pdu->u.cmd_submit.start_frame, pdu->u.cmd_submit.number_of_packets, pdu->u.cmd_submit.interval); break; case USBIP_CMD_UNLINK: pr_debug("USBIP_CMD_UNLINK: seq %u\n", pdu->u.cmd_unlink.seqnum); break; case USBIP_RET_SUBMIT: pr_debug("USBIP_RET_SUBMIT: st %d al %u sf %d #p %d ec %d\n", pdu->u.ret_submit.status, pdu->u.ret_submit.actual_length, pdu->u.ret_submit.start_frame, pdu->u.ret_submit.number_of_packets, pdu->u.ret_submit.error_count); break; case USBIP_RET_UNLINK: pr_debug("USBIP_RET_UNLINK: status %d\n", pdu->u.ret_unlink.status); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_dump_header); /* Receive data over TCP/IP. */ int usbip_recv(struct socket *sock, void *buf, int size) { int result; struct kvec iov = {.iov_base = buf, .iov_len = size}; struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; int total = 0; if (!sock || !buf || !size) return -EINVAL; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); usbip_dbg_xmit("enter\n"); do { sock->sk->sk_allocation = GFP_NOIO; sock->sk->sk_use_task_frag = false; result = sock_recvmsg(sock, &msg, MSG_WAITALL); if (result <= 0) goto err; total += result; } while (msg_data_left(&msg)); if (usbip_dbg_flag_xmit) { pr_debug("receiving....\n"); usbip_dump_buffer(buf, size); pr_debug("received, osize %d ret %d size %zd total %d\n", size, result, msg_data_left(&msg), total); } return total; err: return result; } EXPORT_SYMBOL_GPL(usbip_recv); /* there may be more cases to tweak the flags. */ static unsigned int tweak_transfer_flags(unsigned int flags) { flags &= ~URB_NO_TRANSFER_DMA_MAP; return flags; } /* * USBIP driver packs URB transfer flags in PDUs that are exchanged * between Server (usbip_host) and Client (vhci_hcd). URB_* flags * are internal to kernel and could change. Where as USBIP URB flags * exchanged in PDUs are USBIP user API must not change. * * USBIP_URB* flags are exported as explicit API and client and server * do mapping from kernel flags to USBIP_URB*. Details as follows: * * Client tx path (USBIP_CMD_SUBMIT): * - Maps URB_* to USBIP_URB_* when it sends USBIP_CMD_SUBMIT packet. * * Server rx path (USBIP_CMD_SUBMIT): * - Maps USBIP_URB_* to URB_* when it receives USBIP_CMD_SUBMIT packet. * * Flags aren't included in USBIP_CMD_UNLINK and USBIP_RET_SUBMIT packets * and no special handling is needed for them in the following cases: * - Server rx path (USBIP_CMD_UNLINK) * - Client rx path & Server tx path (USBIP_RET_SUBMIT) * * Code paths: * usbip_pack_pdu() is the common routine that handles packing pdu from * urb and unpack pdu to an urb. * * usbip_pack_cmd_submit() and usbip_pack_ret_submit() handle * USBIP_CMD_SUBMIT and USBIP_RET_SUBMIT respectively. * * usbip_map_urb_to_usbip() and usbip_map_usbip_to_urb() are used * by usbip_pack_cmd_submit() and usbip_pack_ret_submit() to map * flags. */ struct urb_to_usbip_flags { u32 urb_flag; u32 usbip_flag; }; #define NUM_USBIP_FLAGS 17 static const struct urb_to_usbip_flags flag_map[NUM_USBIP_FLAGS] = { {URB_SHORT_NOT_OK, USBIP_URB_SHORT_NOT_OK}, {URB_ISO_ASAP, USBIP_URB_ISO_ASAP}, {URB_NO_TRANSFER_DMA_MAP, USBIP_URB_NO_TRANSFER_DMA_MAP}, {URB_ZERO_PACKET, USBIP_URB_ZERO_PACKET}, {URB_NO_INTERRUPT, USBIP_URB_NO_INTERRUPT}, {URB_FREE_BUFFER, USBIP_URB_FREE_BUFFER}, {URB_DIR_IN, USBIP_URB_DIR_IN}, {URB_DIR_OUT, USBIP_URB_DIR_OUT}, {URB_DIR_MASK, USBIP_URB_DIR_MASK}, {URB_DMA_MAP_SINGLE, USBIP_URB_DMA_MAP_SINGLE}, {URB_DMA_MAP_PAGE, USBIP_URB_DMA_MAP_PAGE}, {URB_DMA_MAP_SG, USBIP_URB_DMA_MAP_SG}, {URB_MAP_LOCAL, USBIP_URB_MAP_LOCAL}, {URB_SETUP_MAP_SINGLE, USBIP_URB_SETUP_MAP_SINGLE}, {URB_SETUP_MAP_LOCAL, USBIP_URB_SETUP_MAP_LOCAL}, {URB_DMA_SG_COMBINED, USBIP_URB_DMA_SG_COMBINED}, {URB_ALIGNED_TEMP_BUFFER, USBIP_URB_ALIGNED_TEMP_BUFFER}, }; static unsigned int urb_to_usbip(unsigned int flags) { unsigned int map_flags = 0; int loop; for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { if (flags & flag_map[loop].urb_flag) map_flags |= flag_map[loop].usbip_flag; } return map_flags; } static unsigned int usbip_to_urb(unsigned int flags) { unsigned int map_flags = 0; int loop; for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { if (flags & flag_map[loop].usbip_flag) map_flags |= flag_map[loop].urb_flag; } return map_flags; } static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb, int pack) { struct usbip_header_cmd_submit *spdu = &pdu->u.cmd_submit; /* * Some members are not still implemented in usbip. I hope this issue * will be discussed when usbip is ported to other operating systems. */ if (pack) { /* map after tweaking the urb flags */ spdu->transfer_flags = urb_to_usbip(tweak_transfer_flags(urb->transfer_flags)); spdu->transfer_buffer_length = urb->transfer_buffer_length; spdu->start_frame = urb->start_frame; spdu->number_of_packets = urb->number_of_packets; spdu->interval = urb->interval; } else { urb->transfer_flags = usbip_to_urb(spdu->transfer_flags); urb->transfer_buffer_length = spdu->transfer_buffer_length; urb->start_frame = spdu->start_frame; urb->number_of_packets = spdu->number_of_packets; urb->interval = spdu->interval; } } static void usbip_pack_ret_submit(struct usbip_header *pdu, struct urb *urb, int pack) { struct usbip_header_ret_submit *rpdu = &pdu->u.ret_submit; if (pack) { rpdu->status = urb->status; rpdu->actual_length = urb->actual_length; rpdu->start_frame = urb->start_frame; rpdu->number_of_packets = urb->number_of_packets; rpdu->error_count = urb->error_count; } else { urb->status = rpdu->status; urb->actual_length = rpdu->actual_length; urb->start_frame = rpdu->start_frame; urb->number_of_packets = rpdu->number_of_packets; urb->error_count = rpdu->error_count; } } void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd, int pack) { switch (cmd) { case USBIP_CMD_SUBMIT: usbip_pack_cmd_submit(pdu, urb, pack); break; case USBIP_RET_SUBMIT: usbip_pack_ret_submit(pdu, urb, pack); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_pack_pdu); static void correct_endian_basic(struct usbip_header_basic *base, int send) { if (send) { base->command = cpu_to_be32(base->command); base->seqnum = cpu_to_be32(base->seqnum); base->devid = cpu_to_be32(base->devid); base->direction = cpu_to_be32(base->direction); base->ep = cpu_to_be32(base->ep); } else { base->command = be32_to_cpu(base->command); base->seqnum = be32_to_cpu(base->seqnum); base->devid = be32_to_cpu(base->devid); base->direction = be32_to_cpu(base->direction); base->ep = be32_to_cpu(base->ep); } } static void correct_endian_cmd_submit(struct usbip_header_cmd_submit *pdu, int send) { if (send) { pdu->transfer_flags = cpu_to_be32(pdu->transfer_flags); cpu_to_be32s(&pdu->transfer_buffer_length); cpu_to_be32s(&pdu->start_frame); cpu_to_be32s(&pdu->number_of_packets); cpu_to_be32s(&pdu->interval); } else { pdu->transfer_flags = be32_to_cpu(pdu->transfer_flags); be32_to_cpus(&pdu->transfer_buffer_length); be32_to_cpus(&pdu->start_frame); be32_to_cpus(&pdu->number_of_packets); be32_to_cpus(&pdu->interval); } } static void correct_endian_ret_submit(struct usbip_header_ret_submit *pdu, int send) { if (send) { cpu_to_be32s(&pdu->status); cpu_to_be32s(&pdu->actual_length); cpu_to_be32s(&pdu->start_frame); cpu_to_be32s(&pdu->number_of_packets); cpu_to_be32s(&pdu->error_count); } else { be32_to_cpus(&pdu->status); be32_to_cpus(&pdu->actual_length); be32_to_cpus(&pdu->start_frame); be32_to_cpus(&pdu->number_of_packets); be32_to_cpus(&pdu->error_count); } } static void correct_endian_cmd_unlink(struct usbip_header_cmd_unlink *pdu, int send) { if (send) pdu->seqnum = cpu_to_be32(pdu->seqnum); else pdu->seqnum = be32_to_cpu(pdu->seqnum); } static void correct_endian_ret_unlink(struct usbip_header_ret_unlink *pdu, int send) { if (send) cpu_to_be32s(&pdu->status); else be32_to_cpus(&pdu->status); } void usbip_header_correct_endian(struct usbip_header *pdu, int send) { __u32 cmd = 0; if (send) cmd = pdu->base.command; correct_endian_basic(&pdu->base, send); if (!send) cmd = pdu->base.command; switch (cmd) { case USBIP_CMD_SUBMIT: correct_endian_cmd_submit(&pdu->u.cmd_submit, send); break; case USBIP_RET_SUBMIT: correct_endian_ret_submit(&pdu->u.ret_submit, send); break; case USBIP_CMD_UNLINK: correct_endian_cmd_unlink(&pdu->u.cmd_unlink, send); break; case USBIP_RET_UNLINK: correct_endian_ret_unlink(&pdu->u.ret_unlink, send); break; default: /* NOT REACHED */ pr_err("unknown command\n"); break; } } EXPORT_SYMBOL_GPL(usbip_header_correct_endian); static void usbip_iso_packet_correct_endian( struct usbip_iso_packet_descriptor *iso, int send) { /* does not need all members. but copy all simply. */ if (send) { iso->offset = cpu_to_be32(iso->offset); iso->length = cpu_to_be32(iso->length); iso->status = cpu_to_be32(iso->status); iso->actual_length = cpu_to_be32(iso->actual_length); } else { iso->offset = be32_to_cpu(iso->offset); iso->length = be32_to_cpu(iso->length); iso->status = be32_to_cpu(iso->status); iso->actual_length = be32_to_cpu(iso->actual_length); } } static void usbip_pack_iso(struct usbip_iso_packet_descriptor *iso, struct usb_iso_packet_descriptor *uiso, int pack) { if (pack) { iso->offset = uiso->offset; iso->length = uiso->length; iso->status = uiso->status; iso->actual_length = uiso->actual_length; } else { uiso->offset = iso->offset; uiso->length = iso->length; uiso->status = iso->status; uiso->actual_length = iso->actual_length; } } /* must free buffer */ struct usbip_iso_packet_descriptor* usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen) { struct usbip_iso_packet_descriptor *iso; int np = urb->number_of_packets; ssize_t size = np * sizeof(*iso); int i; iso = kzalloc(size, GFP_KERNEL); if (!iso) return NULL; for (i = 0; i < np; i++) { usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 1); usbip_iso_packet_correct_endian(&iso[i], 1); } *bufflen = size; return iso; } EXPORT_SYMBOL_GPL(usbip_alloc_iso_desc_pdu); /* some members of urb must be substituted before. */ int usbip_recv_iso(struct usbip_device *ud, struct urb *urb) { void *buff; struct usbip_iso_packet_descriptor *iso; int np = urb->number_of_packets; int size = np * sizeof(*iso); int i; int ret; int total_length = 0; if (!usb_pipeisoc(urb->pipe)) return 0; /* my Bluetooth dongle gets ISO URBs which are np = 0 */ if (np == 0) return 0; buff = kzalloc(size, GFP_KERNEL); if (!buff) return -ENOMEM; ret = usbip_recv(ud->tcp_socket, buff, size); if (ret != size) { dev_err(&urb->dev->dev, "recv iso_frame_descriptor, %d\n", ret); kfree(buff); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } iso = (struct usbip_iso_packet_descriptor *) buff; for (i = 0; i < np; i++) { usbip_iso_packet_correct_endian(&iso[i], 0); usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 0); total_length += urb->iso_frame_desc[i].actual_length; } kfree(buff); if (total_length != urb->actual_length) { dev_err(&urb->dev->dev, "total length of iso packets %d not equal to actual length of buffer %d\n", total_length, urb->actual_length); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } return ret; } EXPORT_SYMBOL_GPL(usbip_recv_iso); /* * This functions restores the padding which was removed for optimizing * the bandwidth during transfer over tcp/ip * * buffer and iso packets need to be stored and be in propeper endian in urb * before calling this function */ void usbip_pad_iso(struct usbip_device *ud, struct urb *urb) { int np = urb->number_of_packets; int i; int actualoffset = urb->actual_length; if (!usb_pipeisoc(urb->pipe)) return; /* if no packets or length of data is 0, then nothing to unpack */ if (np == 0 || urb->actual_length == 0) return; /* * if actual_length is transfer_buffer_length then no padding is * present. */ if (urb->actual_length == urb->transfer_buffer_length) return; /* * loop over all packets from last to first (to prevent overwriting * memory when padding) and move them into the proper place */ for (i = np-1; i > 0; i--) { actualoffset -= urb->iso_frame_desc[i].actual_length; memmove(urb->transfer_buffer + urb->iso_frame_desc[i].offset, urb->transfer_buffer + actualoffset, urb->iso_frame_desc[i].actual_length); } } EXPORT_SYMBOL_GPL(usbip_pad_iso); /* some members of urb must be substituted before. */ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) { struct scatterlist *sg; int ret = 0; int recv; int size; int copy; int i; if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { /* the direction of urb must be OUT. */ if (usb_pipein(urb->pipe)) return 0; size = urb->transfer_buffer_length; } else { /* the direction of urb must be IN. */ if (usb_pipeout(urb->pipe)) return 0; size = urb->actual_length; } /* no need to recv xbuff */ if (!(size > 0)) return 0; if (size > urb->transfer_buffer_length) /* should not happen, probably malicious packet */ goto error; if (urb->num_sgs) { copy = size; for_each_sg(urb->sg, sg, urb->num_sgs, i) { int recv_size; if (copy < sg->length) recv_size = copy; else recv_size = sg->length; recv = usbip_recv(ud->tcp_socket, sg_virt(sg), recv_size); if (recv != recv_size) goto error; copy -= recv; ret += recv; if (!copy) break; } if (ret != size) goto error; } else { ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); if (ret != size) goto error; } return ret; error: dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); else usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return -EPIPE; } EXPORT_SYMBOL_GPL(usbip_recv_xbuff); static int __init usbip_core_init(void) { return usbip_init_eh(); } static void __exit usbip_core_exit(void) { usbip_finish_eh(); return; } module_init(usbip_core_init); module_exit(usbip_core_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
146 145 135 131 123 6 138 131 131 151 151 136 137 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* mpiutil.ac - Utility functions for MPI * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * GnuPG is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GnuPG is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include "mpi-internal.h" /**************** * Note: It was a bad idea to use the number of limbs to allocate * because on a alpha the limbs are large but we normally need * integers of n bits - So we should change this to bits (or bytes). * * But mpi_alloc is used in a lot of places :-) */ MPI mpi_alloc(unsigned nlimbs) { MPI a; a = kmalloc(sizeof *a, GFP_KERNEL); if (!a) return a; if (nlimbs) { a->d = mpi_alloc_limb_space(nlimbs); if (!a->d) { kfree(a); return NULL; } } else { a->d = NULL; } a->alloced = nlimbs; a->nlimbs = 0; a->sign = 0; a->flags = 0; a->nbits = 0; return a; } EXPORT_SYMBOL_GPL(mpi_alloc); mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) { size_t len = nlimbs * sizeof(mpi_limb_t); if (!len) return NULL; return kmalloc(len, GFP_KERNEL); } void mpi_free_limb_space(mpi_ptr_t a) { if (!a) return; kfree_sensitive(a); } void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) { mpi_free_limb_space(a->d); a->d = ap; a->alloced = nlimbs; } /**************** * Resize the array of A to NLIMBS. the additional space is cleared * (set to 0) [done by m_realloc()] */ int mpi_resize(MPI a, unsigned nlimbs) { void *p; if (nlimbs <= a->alloced) return 0; /* no need to do it */ if (a->d) { p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); kfree_sensitive(a->d); a->d = p; } else { a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!a->d) return -ENOMEM; } a->alloced = nlimbs; return 0; } void mpi_free(MPI a) { if (!a) return; if (a->flags & 4) kfree_sensitive(a->d); else mpi_free_limb_space(a->d); if (a->flags & ~7) pr_info("invalid flag value in mpi\n"); kfree(a); } EXPORT_SYMBOL_GPL(mpi_free); /**************** * Note: This copy function should not interpret the MPI * but copy it transparently. */ MPI mpi_copy(MPI a) { int i; MPI b; if (a) { b = mpi_alloc(a->nlimbs); if (!b) return NULL; b->nlimbs = a->nlimbs; b->sign = a->sign; b->flags = a->flags; b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ for (i = 0; i < b->nlimbs; i++) b->d[i] = a->d[i]; } else b = NULL; return b; } MODULE_DESCRIPTION("Multiprecision maths library"); MODULE_LICENSE("GPL");
16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 18 18 18 18 18 18 10 1 2 8 8 7 2 4 3 2 2 5 1 33 34 29 1 22 6 28 277 277 272 77 76 4 4 254 3 15 15 15 15 1 1 1 2 1 1 1 2 2 7 1 2 1 3 9 1 1 2 5 2 9 1 3 1 2 58 1 22 1 9 10 10 10 2 1 3 4 3 7 49 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/export.h> #include <linux/uaccess.h> #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_property.h> #include "drm_crtc_internal.h" /** * DOC: overview * * Properties as represented by &drm_property are used to extend the modeset * interface exposed to userspace. For the atomic modeset IOCTL properties are * even the only way to transport metadata about the desired new modeset * configuration from userspace to the kernel. Properties have a well-defined * value range, which is enforced by the drm core. See the documentation of the * flags member of &struct drm_property for an overview of the different * property types and ranges. * * Properties don't store the current value directly, but need to be * instantiated by attaching them to a &drm_mode_object with * drm_object_attach_property(). * * Property values are only 64bit. To support bigger piles of data (like gamma * tables, color correction matrices or large structures) a property can instead * point at a &drm_property_blob with that additional data. * * Properties are defined by their symbolic name, userspace must keep a * per-object mapping from those names to the property ID used in the atomic * IOCTL and in the get/set property IOCTL. */ static bool drm_property_flags_valid(u32 flags) { u32 legacy_type = flags & DRM_MODE_PROP_LEGACY_TYPE; u32 ext_type = flags & DRM_MODE_PROP_EXTENDED_TYPE; /* Reject undefined/deprecated flags */ if (flags & ~(DRM_MODE_PROP_LEGACY_TYPE | DRM_MODE_PROP_EXTENDED_TYPE | DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_ATOMIC)) return false; /* We want either a legacy type or an extended type, but not both */ if (!legacy_type == !ext_type) return false; /* Only one legacy type at a time please */ if (legacy_type && !is_power_of_2(legacy_type)) return false; return true; } /** * drm_property_create - create a new property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @num_values: number of pre-defined values * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create(struct drm_device *dev, u32 flags, const char *name, int num_values) { struct drm_property *property = NULL; int ret; if (WARN_ON(!drm_property_flags_valid(flags))) return NULL; if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN)) return NULL; property = kzalloc(sizeof(struct drm_property), GFP_KERNEL); if (!property) return NULL; property->dev = dev; if (num_values) { property->values = kcalloc(num_values, sizeof(uint64_t), GFP_KERNEL); if (!property->values) goto fail; } ret = drm_mode_object_add(dev, &property->base, DRM_MODE_OBJECT_PROPERTY); if (ret) goto fail; property->flags = flags; property->num_values = num_values; INIT_LIST_HEAD(&property->enum_list); strscpy_pad(property->name, name, DRM_PROP_NAME_LEN); list_add_tail(&property->head, &dev->mode_config.property_list); return property; fail: kfree(property->values); kfree(property); return NULL; } EXPORT_SYMBOL(drm_property_create); /** * drm_property_create_enum - create a new enumeration property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @props: enumeration lists with property values * @num_values: number of pre-defined values * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Userspace is only allowed to set one of the predefined values for enumeration * properties. * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_enum(struct drm_device *dev, u32 flags, const char *name, const struct drm_prop_enum_list *props, int num_values) { struct drm_property *property; int i, ret; flags |= DRM_MODE_PROP_ENUM; property = drm_property_create(dev, flags, name, num_values); if (!property) return NULL; for (i = 0; i < num_values; i++) { ret = drm_property_add_enum(property, props[i].type, props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; } } return property; } EXPORT_SYMBOL(drm_property_create_enum); /** * drm_property_create_bitmask - create a new bitmask property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @props: enumeration lists with property bitflags * @num_props: size of the @props array * @supported_bits: bitmask of all supported enumeration values * * This creates a new bitmask drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Compared to plain enumeration properties userspace is allowed to set any * or'ed together combination of the predefined property bitflag values * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, u32 flags, const char *name, const struct drm_prop_enum_list *props, int num_props, uint64_t supported_bits) { struct drm_property *property; int i, ret; int num_values = hweight64(supported_bits); flags |= DRM_MODE_PROP_BITMASK; property = drm_property_create(dev, flags, name, num_values); if (!property) return NULL; for (i = 0; i < num_props; i++) { if (!(supported_bits & (1ULL << props[i].type))) continue; ret = drm_property_add_enum(property, props[i].type, props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; } } return property; } EXPORT_SYMBOL(drm_property_create_bitmask); static struct drm_property *property_create_range(struct drm_device *dev, u32 flags, const char *name, uint64_t min, uint64_t max) { struct drm_property *property; property = drm_property_create(dev, flags, name, 2); if (!property) return NULL; property->values[0] = min; property->values[1] = max; return property; } /** * drm_property_create_range - create a new unsigned ranged property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @min: minimum value of the property * @max: maximum value of the property * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Userspace is allowed to set any unsigned integer value in the (min, max) * range inclusive. * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_range(struct drm_device *dev, u32 flags, const char *name, uint64_t min, uint64_t max) { return property_create_range(dev, DRM_MODE_PROP_RANGE | flags, name, min, max); } EXPORT_SYMBOL(drm_property_create_range); /** * drm_property_create_signed_range - create a new signed ranged property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @min: minimum value of the property * @max: maximum value of the property * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Userspace is allowed to set any signed integer value in the (min, max) * range inclusive. * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_signed_range(struct drm_device *dev, u32 flags, const char *name, int64_t min, int64_t max) { return property_create_range(dev, DRM_MODE_PROP_SIGNED_RANGE | flags, name, I642U64(min), I642U64(max)); } EXPORT_SYMBOL(drm_property_create_signed_range); /** * drm_property_create_object - create a new object property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * @type: object type from DRM_MODE_OBJECT_* defines * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * Userspace is only allowed to set this to any property value of the given * @type. Only useful for atomic properties, which is enforced. * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_object(struct drm_device *dev, u32 flags, const char *name, uint32_t type) { struct drm_property *property; flags |= DRM_MODE_PROP_OBJECT; if (WARN_ON(!(flags & DRM_MODE_PROP_ATOMIC))) return NULL; property = drm_property_create(dev, flags, name, 1); if (!property) return NULL; property->values[0] = type; return property; } EXPORT_SYMBOL(drm_property_create_object); /** * drm_property_create_bool - create a new boolean property type * @dev: drm device * @flags: flags specifying the property type * @name: name of the property * * This creates a new generic drm property which can then be attached to a drm * object with drm_object_attach_property(). The returned property object must * be freed with drm_property_destroy(), which is done automatically when * calling drm_mode_config_cleanup(). * * This is implemented as a ranged property with only {0, 1} as valid values. * * Returns: * A pointer to the newly created property on success, NULL on failure. */ struct drm_property *drm_property_create_bool(struct drm_device *dev, u32 flags, const char *name) { return drm_property_create_range(dev, flags, name, 0, 1); } EXPORT_SYMBOL(drm_property_create_bool); /** * drm_property_add_enum - add a possible value to an enumeration property * @property: enumeration property to change * @value: value of the new enumeration * @name: symbolic name of the new enumeration * * This functions adds enumerations to a property. * * It's use is deprecated, drivers should use one of the more specific helpers * to directly create the property with all enumerations already attached. * * Returns: * Zero on success, error code on failure. */ int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name) { struct drm_property_enum *prop_enum; int index = 0; if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN)) return -EINVAL; if (WARN_ON(!drm_property_type_is(property, DRM_MODE_PROP_ENUM) && !drm_property_type_is(property, DRM_MODE_PROP_BITMASK))) return -EINVAL; /* * Bitmask enum properties have the additional constraint of values * from 0 to 63 */ if (WARN_ON(drm_property_type_is(property, DRM_MODE_PROP_BITMASK) && value > 63)) return -EINVAL; list_for_each_entry(prop_enum, &property->enum_list, head) { if (WARN_ON(prop_enum->value == value)) return -EINVAL; index++; } if (WARN_ON(index >= property->num_values)) return -EINVAL; prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL); if (!prop_enum) return -ENOMEM; strscpy_pad(prop_enum->name, name, DRM_PROP_NAME_LEN); prop_enum->value = value; property->values[index] = value; list_add_tail(&prop_enum->head, &property->enum_list); return 0; } EXPORT_SYMBOL(drm_property_add_enum); /** * drm_property_destroy - destroy a drm property * @dev: drm device * @property: property to destroy * * This function frees a property including any attached resources like * enumeration values. */ void drm_property_destroy(struct drm_device *dev, struct drm_property *property) { struct drm_property_enum *prop_enum, *pt; list_for_each_entry_safe(prop_enum, pt, &property->enum_list, head) { list_del(&prop_enum->head); kfree(prop_enum); } if (property->num_values) kfree(property->values); drm_mode_object_unregister(dev, &property->base); list_del(&property->head); kfree(property); } EXPORT_SYMBOL(drm_property_destroy); int drm_mode_getproperty_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_get_property *out_resp = data; struct drm_property *property; int enum_count = 0; int value_count = 0; int i, copied; struct drm_property_enum *prop_enum; struct drm_mode_property_enum __user *enum_ptr; uint64_t __user *values_ptr; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; property = drm_property_find(dev, file_priv, out_resp->prop_id); if (!property) return -ENOENT; strscpy_pad(out_resp->name, property->name, DRM_PROP_NAME_LEN); out_resp->flags = property->flags; value_count = property->num_values; values_ptr = u64_to_user_ptr(out_resp->values_ptr); for (i = 0; i < value_count; i++) { if (i < out_resp->count_values && put_user(property->values[i], values_ptr + i)) { return -EFAULT; } } out_resp->count_values = value_count; copied = 0; enum_ptr = u64_to_user_ptr(out_resp->enum_blob_ptr); if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) || drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { list_for_each_entry(prop_enum, &property->enum_list, head) { enum_count++; if (out_resp->count_enum_blobs < enum_count) continue; if (copy_to_user(&enum_ptr[copied].value, &prop_enum->value, sizeof(uint64_t))) return -EFAULT; if (copy_to_user(&enum_ptr[copied].name, &prop_enum->name, DRM_PROP_NAME_LEN)) return -EFAULT; copied++; } out_resp->count_enum_blobs = enum_count; } /* * NOTE: The idea seems to have been to use this to read all the blob * property values. But nothing ever added them to the corresponding * list, userspace always used the special-purpose get_blob ioctl to * read the value for a blob property. It also doesn't make a lot of * sense to return values here when everything else is just metadata for * the property itself. */ if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) out_resp->count_enum_blobs = 0; return 0; } static void drm_property_free_blob(struct kref *kref) { struct drm_property_blob *blob = container_of(kref, struct drm_property_blob, base.refcount); mutex_lock(&blob->dev->mode_config.blob_lock); list_del(&blob->head_global); mutex_unlock(&blob->dev->mode_config.blob_lock); drm_mode_object_unregister(blob->dev, &blob->base); kvfree(blob); } /** * drm_property_create_blob - Create new blob property * @dev: DRM device to create property for * @length: Length to allocate for blob data * @data: If specified, copies data into blob * * Creates a new blob property for a specified DRM device, optionally * copying data. Note that blob properties are meant to be invariant, hence the * data must be filled out before the blob is used as the value of any property. * * Returns: * New blob property with a single reference on success, or an ERR_PTR * value on failure. */ struct drm_property_blob * drm_property_create_blob(struct drm_device *dev, size_t length, const void *data) { struct drm_property_blob *blob; int ret; if (!length || length > INT_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); if (!blob) return ERR_PTR(-ENOMEM); /* This must be explicitly initialised, so we can safely call list_del * on it in the removal handler, even if it isn't in a file list. */ INIT_LIST_HEAD(&blob->head_file); blob->data = (void *)blob + sizeof(*blob); blob->length = length; blob->dev = dev; if (data) memcpy(blob->data, data, length); ret = __drm_mode_object_add(dev, &blob->base, DRM_MODE_OBJECT_BLOB, true, drm_property_free_blob); if (ret) { kvfree(blob); return ERR_PTR(-EINVAL); } mutex_lock(&dev->mode_config.blob_lock); list_add_tail(&blob->head_global, &dev->mode_config.property_blob_list); mutex_unlock(&dev->mode_config.blob_lock); return blob; } EXPORT_SYMBOL(drm_property_create_blob); /** * drm_property_blob_put - release a blob property reference * @blob: DRM blob property * * Releases a reference to a blob property. May free the object. */ void drm_property_blob_put(struct drm_property_blob *blob) { if (!blob) return; drm_mode_object_put(&blob->base); } EXPORT_SYMBOL(drm_property_blob_put); void drm_property_destroy_user_blobs(struct drm_device *dev, struct drm_file *file_priv) { struct drm_property_blob *blob, *bt; /* * When the file gets released that means no one else can access the * blob list any more, so no need to grab dev->blob_lock. */ list_for_each_entry_safe(blob, bt, &file_priv->blobs, head_file) { list_del_init(&blob->head_file); drm_property_blob_put(blob); } } /** * drm_property_blob_get - acquire blob property reference * @blob: DRM blob property * * Acquires a reference to an existing blob property. Returns @blob, which * allows this to be used as a shorthand in assignments. */ struct drm_property_blob *drm_property_blob_get(struct drm_property_blob *blob) { drm_mode_object_get(&blob->base); return blob; } EXPORT_SYMBOL(drm_property_blob_get); /** * drm_property_lookup_blob - look up a blob property and take a reference * @dev: drm device * @id: id of the blob property * * If successful, this takes an additional reference to the blob property. * callers need to make sure to eventually unreferenced the returned property * again, using drm_property_blob_put(). * * Return: * NULL on failure, pointer to the blob on success. */ struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev, uint32_t id) { struct drm_mode_object *obj; struct drm_property_blob *blob = NULL; obj = __drm_mode_object_find(dev, NULL, id, DRM_MODE_OBJECT_BLOB); if (obj) blob = obj_to_blob(obj); return blob; } EXPORT_SYMBOL(drm_property_lookup_blob); /** * drm_property_replace_global_blob - replace existing blob property * @dev: drm device * @replace: location of blob property pointer to be replaced * @length: length of data for new blob, or 0 for no data * @data: content for new blob, or NULL for no data * @obj_holds_id: optional object for property holding blob ID * @prop_holds_id: optional property holding blob ID * @return 0 on success or error on failure * * This function will replace a global property in the blob list, optionally * updating a property which holds the ID of that property. * * If length is 0 or data is NULL, no new blob will be created, and the holding * property, if specified, will be set to 0. * * Access to the replace pointer is assumed to be protected by the caller, e.g. * by holding the relevant modesetting object lock for its parent. * * For example, a drm_connector has a 'PATH' property, which contains the ID * of a blob property with the value of the MST path information. Calling this * function with replace pointing to the connector's path_blob_ptr, length and * data set for the new path information, obj_holds_id set to the connector's * base object, and prop_holds_id set to the path property name, will perform * a completely atomic update. The access to path_blob_ptr is protected by the * caller holding a lock on the connector. */ int drm_property_replace_global_blob(struct drm_device *dev, struct drm_property_blob **replace, size_t length, const void *data, struct drm_mode_object *obj_holds_id, struct drm_property *prop_holds_id) { struct drm_property_blob *new_blob = NULL; struct drm_property_blob *old_blob = NULL; int ret; WARN_ON(replace == NULL); old_blob = *replace; if (length && data) { new_blob = drm_property_create_blob(dev, length, data); if (IS_ERR(new_blob)) return PTR_ERR(new_blob); } if (obj_holds_id) { ret = drm_object_property_set_value(obj_holds_id, prop_holds_id, new_blob ? new_blob->base.id : 0); if (ret != 0) goto err_created; } drm_property_blob_put(old_blob); *replace = new_blob; return 0; err_created: drm_property_blob_put(new_blob); return ret; } EXPORT_SYMBOL(drm_property_replace_global_blob); /** * drm_property_replace_blob - replace a blob property * @blob: a pointer to the member blob to be replaced * @new_blob: the new blob to replace with * * Return: true if the blob was in fact replaced. */ bool drm_property_replace_blob(struct drm_property_blob **blob, struct drm_property_blob *new_blob) { struct drm_property_blob *old_blob = *blob; if (old_blob == new_blob) return false; drm_property_blob_put(old_blob); if (new_blob) drm_property_blob_get(new_blob); *blob = new_blob; return true; } EXPORT_SYMBOL(drm_property_replace_blob); /** * drm_property_replace_blob_from_id - replace a blob property taking a reference * @dev: DRM device * @blob: a pointer to the member blob to be replaced * @blob_id: the id of the new blob to replace with * @expected_size: expected size of the blob property * @expected_elem_size: expected size of an element in the blob property * @replaced: if the blob was in fact replaced * * Look up the new blob from id, take its reference, check expected sizes of * the blob and its element and replace the old blob by the new one. Advertise * if the replacement operation was successful. * * Return: true if the blob was in fact replaced. -EINVAL if the new blob was * not found or sizes don't match. */ int drm_property_replace_blob_from_id(struct drm_device *dev, struct drm_property_blob **blob, uint64_t blob_id, ssize_t expected_size, ssize_t expected_elem_size, bool *replaced) { struct drm_property_blob *new_blob = NULL; if (blob_id != 0) { new_blob = drm_property_lookup_blob(dev, blob_id); if (new_blob == NULL) { drm_dbg_atomic(dev, "cannot find blob ID %llu\n", blob_id); return -EINVAL; } if (expected_size > 0 && new_blob->length != expected_size) { drm_dbg_atomic(dev, "[BLOB:%d] length %zu different from expected %zu\n", new_blob->base.id, new_blob->length, expected_size); drm_property_blob_put(new_blob); return -EINVAL; } if (expected_elem_size > 0 && new_blob->length % expected_elem_size != 0) { drm_dbg_atomic(dev, "[BLOB:%d] length %zu not divisible by element size %zu\n", new_blob->base.id, new_blob->length, expected_elem_size); drm_property_blob_put(new_blob); return -EINVAL; } } *replaced |= drm_property_replace_blob(blob, new_blob); drm_property_blob_put(new_blob); return 0; } EXPORT_SYMBOL(drm_property_replace_blob_from_id); int drm_mode_getblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_get_blob *out_resp = data; struct drm_property_blob *blob; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; blob = drm_property_lookup_blob(dev, out_resp->blob_id); if (!blob) return -ENOENT; if (out_resp->length == blob->length) { if (copy_to_user(u64_to_user_ptr(out_resp->data), blob->data, blob->length)) { ret = -EFAULT; goto unref; } } out_resp->length = blob->length; unref: drm_property_blob_put(blob); return ret; } int drm_mode_createblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_create_blob *out_resp = data; struct drm_property_blob *blob; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; blob = drm_property_create_blob(dev, out_resp->length, NULL); if (IS_ERR(blob)) return PTR_ERR(blob); if (copy_from_user(blob->data, u64_to_user_ptr(out_resp->data), out_resp->length)) { ret = -EFAULT; goto out_blob; } /* Dropping the lock between create_blob and our access here is safe * as only the same file_priv can remove the blob; at this point, it is * not associated with any file_priv. */ mutex_lock(&dev->mode_config.blob_lock); out_resp->blob_id = blob->base.id; list_add_tail(&blob->head_file, &file_priv->blobs); mutex_unlock(&dev->mode_config.blob_lock); return 0; out_blob: drm_property_blob_put(blob); return ret; } int drm_mode_destroyblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_destroy_blob *out_resp = data; struct drm_property_blob *blob = NULL, *bt; bool found = false; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; blob = drm_property_lookup_blob(dev, out_resp->blob_id); if (!blob) return -ENOENT; mutex_lock(&dev->mode_config.blob_lock); /* Ensure the property was actually created by this user. */ list_for_each_entry(bt, &file_priv->blobs, head_file) { if (bt == blob) { found = true; break; } } if (!found) { ret = -EPERM; goto err; } /* We must drop head_file here, because we may not be the last * reference on the blob. */ list_del_init(&blob->head_file); mutex_unlock(&dev->mode_config.blob_lock); /* One reference from lookup, and one from the filp. */ drm_property_blob_put(blob); drm_property_blob_put(blob); return 0; err: mutex_unlock(&dev->mode_config.blob_lock); drm_property_blob_put(blob); return ret; } /* Some properties could refer to dynamic refcnt'd objects, or things that * need special locking to handle lifetime issues (ie. to ensure the prop * value doesn't become invalid part way through the property update due to * race). The value returned by reference via 'obj' should be passed back * to drm_property_change_valid_put() after the property is set (and the * object to which the property is attached has a chance to take its own * reference). */ bool drm_property_change_valid_get(struct drm_property *property, uint64_t value, struct drm_mode_object **ref) { int i; if (property->flags & DRM_MODE_PROP_IMMUTABLE) return false; *ref = NULL; if (drm_property_type_is(property, DRM_MODE_PROP_RANGE)) { if (value < property->values[0] || value > property->values[1]) return false; return true; } else if (drm_property_type_is(property, DRM_MODE_PROP_SIGNED_RANGE)) { int64_t svalue = U642I64(value); if (svalue < U642I64(property->values[0]) || svalue > U642I64(property->values[1])) return false; return true; } else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { uint64_t valid_mask = 0; for (i = 0; i < property->num_values; i++) valid_mask |= (1ULL << property->values[i]); return !(value & ~valid_mask); } else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) { struct drm_property_blob *blob; if (value == 0) return true; blob = drm_property_lookup_blob(property->dev, value); if (blob) { *ref = &blob->base; return true; } else { return false; } } else if (drm_property_type_is(property, DRM_MODE_PROP_OBJECT)) { /* a zero value for an object property translates to null: */ if (value == 0) return true; *ref = __drm_mode_object_find(property->dev, NULL, value, property->values[0]); return *ref != NULL; } for (i = 0; i < property->num_values; i++) if (property->values[i] == value) return true; return false; } void drm_property_change_valid_put(struct drm_property *property, struct drm_mode_object *ref) { if (!ref) return; if (drm_property_type_is(property, DRM_MODE_PROP_OBJECT)) { drm_mode_object_put(ref); } else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) drm_property_blob_put(obj_to_blob(ref)); }
5 5 3 2 5 2 3 5 5 5 5 1 5 5 2 3 5 5 8 8 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 /* * Compressed rom filesystem for Linux. * * Copyright (C) 1999 Linus Torvalds. * * This file is released under the GPL. */ /* * These are the VFS interfaces to the compressed rom filesystem. * The actual compression is based on zlib, see the other files. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/pfn_t.h> #include <linux/ramfs.h> #include <linux/init.h> #include <linux/string.h> #include <linux/blkdev.h> #include <linux/mtd/mtd.h> #include <linux/mtd/super.h> #include <linux/fs_context.h> #include <linux/slab.h> #include <linux/vfs.h> #include <linux/mutex.h> #include <uapi/linux/cramfs_fs.h> #include <linux/uaccess.h> #include "internal.h" /* * cramfs super-block data in memory */ struct cramfs_sb_info { unsigned long magic; unsigned long size; unsigned long blocks; unsigned long files; unsigned long flags; void *linear_virt_addr; resource_size_t linear_phys_addr; size_t mtd_point_size; }; static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) { return sb->s_fs_info; } static const struct super_operations cramfs_ops; static const struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; static const struct file_operations cramfs_physmem_fops; static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); /* These macros may change in future, to provide better st_ino semantics. */ #define OFFSET(x) ((x)->i_ino) static unsigned long cramino(const struct cramfs_inode *cino, unsigned int offset) { if (!cino->offset) return offset + 1; if (!cino->size) return offset + 1; /* * The file mode test fixes buggy mkcramfs implementations where * cramfs_inode->offset is set to a non zero value for entries * which did not contain data, like devices node and fifos. */ switch (cino->mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: return cino->offset << 2; default: break; } return offset + 1; } static struct inode *get_cramfs_inode(struct super_block *sb, const struct cramfs_inode *cramfs_inode, unsigned int offset) { struct inode *inode; static struct timespec64 zerotime; inode = iget_locked(sb, cramino(cramfs_inode, offset)); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; switch (cramfs_inode->mode & S_IFMT) { case S_IFREG: inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; if (IS_ENABLED(CONFIG_CRAMFS_MTD) && CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && CRAMFS_SB(sb)->linear_phys_addr) inode->i_fop = &cramfs_physmem_fops; break; case S_IFDIR: inode->i_op = &cramfs_dir_inode_operations; inode->i_fop = &cramfs_directory_operations; break; case S_IFLNK: inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_data.a_ops = &cramfs_aops; break; default: init_special_inode(inode, cramfs_inode->mode, old_decode_dev(cramfs_inode->size)); } inode->i_mode = cramfs_inode->mode; i_uid_write(inode, cramfs_inode->uid); i_gid_write(inode, cramfs_inode->gid); /* if the lower 2 bits are zero, the inode contains data */ if (!(inode->i_ino & 3)) { inode->i_size = cramfs_inode->size; inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; } /* Struct copy intentional */ inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, zerotime))); /* inode->i_nlink is left 1 - arguably wrong for directories, but it's the best we can do without reading the directory contents. 1 yields the right result in GNU find, even without -noleaf option. */ unlock_new_inode(inode); return inode; } /* * We have our own block cache: don't fill up the buffer cache * with the rom-image, because the way the filesystem is set * up the accesses should be fairly regular and cached in the * page cache and dentry tree anyway.. * * This also acts as a way to guarantee contiguous areas of up to * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to * worry about end-of-buffer issues even when decompressing a full * page cache. * * Note: This is all optimized away at compile time when * CONFIG_CRAMFS_BLOCKDEV=n. */ #define READ_BUFFERS (2) /* NEXT_BUFFER(): Loop over [0..(READ_BUFFERS-1)]. */ #define NEXT_BUFFER(_ix) ((_ix) ^ 1) /* * BLKS_PER_BUF_SHIFT should be at least 2 to allow for "compressed" * data that takes up more space than the original and with unlucky * alignment. */ #define BLKS_PER_BUF_SHIFT (2) #define BLKS_PER_BUF (1 << BLKS_PER_BUF_SHIFT) #define BUFFER_SIZE (BLKS_PER_BUF*PAGE_SIZE) static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE]; static unsigned buffer_blocknr[READ_BUFFERS]; static struct super_block *buffer_dev[READ_BUFFERS]; static int next_buffer; /* * Populate our block cache and return a pointer to it. */ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_mapping; struct file_ra_state ra = {}; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer; unsigned long devsize; char *data; if (!len) return NULL; blocknr = offset >> PAGE_SHIFT; offset &= PAGE_SIZE - 1; /* Check if an existing buffer already has the data.. */ for (i = 0; i < READ_BUFFERS; i++) { unsigned int blk_offset; if (buffer_dev[i] != sb) continue; if (blocknr < buffer_blocknr[i]) continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT; blk_offset += offset; if (blk_offset > BUFFER_SIZE || blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT; /* Ok, read in BLKS_PER_BUF pages completely first. */ file_ra_state_init(&ra, mapping); page_cache_sync_readahead(mapping, &ra, NULL, blocknr, BLKS_PER_BUF); for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; if (blocknr + i < devsize) { page = read_mapping_page(mapping, blocknr + i, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; } pages[i] = page; } buffer = next_buffer; next_buffer = NEXT_BUFFER(buffer); buffer_blocknr[buffer] = blocknr; buffer_dev[buffer] = sb; data = read_buffers[buffer]; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { memcpy_from_page(data, page, 0, PAGE_SIZE); put_page(page); } else memset(data, 0, PAGE_SIZE); data += PAGE_SIZE; } return read_buffers[buffer] + offset; } /* * Return a pointer to the linearly addressed cramfs image in memory. */ static void *cramfs_direct_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); if (!len) return NULL; if (len > sbi->size || offset > sbi->size - len) return page_address(ZERO_PAGE(0)); return sbi->linear_virt_addr + offset; } /* * Returns a pointer to a buffer containing at least LEN bytes of * filesystem starting at byte offset OFFSET into the filesystem. */ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sbi->linear_virt_addr) return cramfs_direct_read(sb, offset, len); else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) return cramfs_blkdev_read(sb, offset, len); else return NULL; } /* * For a mapping to be possible, we need a range of uncompressed and * contiguous blocks. Return the offset for the first block and number of * valid blocks for which that is true, or zero otherwise. */ static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) { struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); int i; u32 *blockptrs, first_block_addr; /* * We can dereference memory directly here as this code may be * reached only when there is a direct filesystem image mapping * available in memory. */ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4); first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS; i = 0; do { u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT); u32 expect = (first_block_addr + block_off) | CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED; if (blockptrs[i] != expect) { pr_debug("range: block %d/%d got %#x expects %#x\n", pgoff+i, pgoff + *pages - 1, blockptrs[i], expect); if (i == 0) return 0; break; } } while (++i < *pages); *pages = i; return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT; } #ifdef CONFIG_MMU /* * Return true if the last page of a file in the filesystem image contains * some other data that doesn't belong to that file. It is assumed that the * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED * (verified by cramfs_get_block_range() and directly accessible in memory. */ static bool cramfs_last_page_is_shared(struct inode *inode) { struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); u32 partial, last_page, blockaddr, *blockptrs; char *tail_data; partial = offset_in_page(inode->i_size); if (!partial) return false; last_page = inode->i_size >> PAGE_SHIFT; blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode)); blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS; blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; tail_data = sbi->linear_virt_addr + blockaddr + partial; return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false; } static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); unsigned int pages, max_pages, offset; unsigned long address, pgoff = vma->vm_pgoff; char *bailout_reason; int ret; ret = generic_file_readonly_mmap(file, vma); if (ret) return ret; /* * Now try to pre-populate ptes for this vma with a direct * mapping avoiding memory allocation when possible. */ /* Could COW work here? */ bailout_reason = "vma is writable"; if (vma->vm_flags & VM_WRITE) goto bailout; max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; bailout_reason = "beyond file limit"; if (pgoff >= max_pages) goto bailout; pages = min(vma_pages(vma), max_pages - pgoff); offset = cramfs_get_block_range(inode, pgoff, &pages); bailout_reason = "unsuitable block layout"; if (!offset) goto bailout; address = sbi->linear_phys_addr + offset; bailout_reason = "data is not page aligned"; if (!PAGE_ALIGNED(address)) goto bailout; /* Don't map the last page if it contains some other data */ if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) { pr_debug("mmap: %pD: last page is shared\n", file); pages--; } if (!pages) { bailout_reason = "no suitable block remaining"; goto bailout; } if (pages == vma_pages(vma)) { /* * The entire vma is mappable. remap_pfn_range() will * make it distinguishable from a non-direct mapping * in /proc/<pid>/maps by substituting the file offset * with the actual physical address. */ ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, pages * PAGE_SIZE, vma->vm_page_prot); } else { /* * Let's create a mixed map if we can't map it all. * The normal paging machinery will take care of the * unpopulated ptes via cramfs_read_folio(). */ int i; vm_flags_set(vma, VM_MIXEDMAP); for (i = 0; i < pages && !ret; i++) { vm_fault_t vmf; unsigned long off = i * PAGE_SIZE; pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV); vmf = vmf_insert_mixed(vma, vma->vm_start + off, pfn); if (vmf & VM_FAULT_ERROR) ret = vm_fault_to_errno(vmf, 0); } } if (!ret) pr_debug("mapped %pD[%lu] at 0x%08lx (%u/%lu pages) " "to vma 0x%08lx, page_prot 0x%llx\n", file, pgoff, address, pages, vma_pages(vma), vma->vm_start, (unsigned long long)pgprot_val(vma->vm_page_prot)); return ret; bailout: pr_debug("%pD[%lu]: direct mmap impossible: %s\n", file, pgoff, bailout_reason); /* Didn't manage any direct map, but normal paging is still possible */ return 0; } #else /* CONFIG_MMU */ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) { return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS; } static unsigned long cramfs_physmem_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct cramfs_sb_info *sbi = CRAMFS_SB(sb); unsigned int pages, block_pages, max_pages, offset; pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; if (pgoff >= max_pages || pages > max_pages - pgoff) return -EINVAL; block_pages = pages; offset = cramfs_get_block_range(inode, pgoff, &block_pages); if (!offset || block_pages != pages) return -ENOSYS; addr = sbi->linear_phys_addr + offset; pr_debug("get_unmapped for %pD ofs %#lx siz %lu at 0x%08lx\n", file, pgoff*PAGE_SIZE, len, addr); return addr; } static unsigned int cramfs_physmem_mmap_capabilities(struct file *file) { return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_EXEC; } #endif /* CONFIG_MMU */ static const struct file_operations cramfs_physmem_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .splice_read = filemap_splice_read, .mmap = cramfs_physmem_mmap, #ifndef CONFIG_MMU .get_unmapped_area = cramfs_physmem_get_unmapped_area, .mmap_capabilities = cramfs_physmem_mmap_capabilities, #endif }; static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); generic_shutdown_super(sb); if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); put_mtd_device(sb->s_mtd); sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); bdev_fput(sb->s_bdev_file); } kfree(sbi); } static int cramfs_reconfigure(struct fs_context *fc) { sync_filesystem(fc->root->d_sb); fc->sb_flags |= SB_RDONLY; return 0; } static int cramfs_read_super(struct super_block *sb, struct fs_context *fc, struct cramfs_super *super) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); unsigned long root_offset; bool silent = fc->sb_flags & SB_SILENT; /* We don't know the real size yet */ sbi->size = PAGE_SIZE; /* Read the first block and get the superblock from it */ mutex_lock(&read_mutex); memcpy(super, cramfs_read(sb, 0, sizeof(*super)), sizeof(*super)); mutex_unlock(&read_mutex); /* Do sanity checks on the superblock */ if (super->magic != CRAMFS_MAGIC) { /* check for wrong endianness */ if (super->magic == CRAMFS_MAGIC_WEND) { if (!silent) errorfc(fc, "wrong endianness"); return -EINVAL; } /* check at 512 byte offset */ mutex_lock(&read_mutex); memcpy(super, cramfs_read(sb, 512, sizeof(*super)), sizeof(*super)); mutex_unlock(&read_mutex); if (super->magic != CRAMFS_MAGIC) { if (super->magic == CRAMFS_MAGIC_WEND && !silent) errorfc(fc, "wrong endianness"); else if (!silent) errorfc(fc, "wrong magic"); return -EINVAL; } } /* get feature flags first */ if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) { errorfc(fc, "unsupported filesystem features"); return -EINVAL; } /* Check that the root inode is in a sane state */ if (!S_ISDIR(super->root.mode)) { errorfc(fc, "root is not a directory"); return -EINVAL; } /* correct strange, hard-coded permissions of mkcramfs */ super->root.mode |= 0555; root_offset = super->root.offset << 2; if (super->flags & CRAMFS_FLAG_FSID_VERSION_2) { sbi->size = super->size; sbi->blocks = super->fsid.blocks; sbi->files = super->fsid.files; } else { sbi->size = 1<<28; sbi->blocks = 0; sbi->files = 0; } sbi->magic = super->magic; sbi->flags = super->flags; if (root_offset == 0) infofc(fc, "empty filesystem"); else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && ((root_offset != sizeof(struct cramfs_super)) && (root_offset != 512 + sizeof(struct cramfs_super)))) { errorfc(fc, "bad root offset %lu", root_offset); return -EINVAL; } return 0; } static int cramfs_finalize_super(struct super_block *sb, struct cramfs_inode *cramfs_root) { struct inode *root; /* Set it all up.. */ sb->s_flags |= SB_RDONLY; sb->s_time_min = 0; sb->s_time_max = 0; sb->s_op = &cramfs_ops; root = get_cramfs_inode(sb, cramfs_root, 0); if (IS_ERR(root)) return PTR_ERR(root); sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; return 0; } static int cramfs_blkdev_fill_super(struct super_block *sb, struct fs_context *fc) { struct cramfs_sb_info *sbi; struct cramfs_super super; int i, err; sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; /* Invalidate the read buffers on mount: think disk change.. */ for (i = 0; i < READ_BUFFERS; i++) buffer_blocknr[i] = -1; err = cramfs_read_super(sb, fc, &super); if (err) return err; return cramfs_finalize_super(sb, &super.root); } static int cramfs_mtd_fill_super(struct super_block *sb, struct fs_context *fc) { struct cramfs_sb_info *sbi; struct cramfs_super super; int err; sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; /* Map only one page for now. Will remap it when fs size is known. */ err = mtd_point(sb->s_mtd, 0, PAGE_SIZE, &sbi->mtd_point_size, &sbi->linear_virt_addr, &sbi->linear_phys_addr); if (err || sbi->mtd_point_size != PAGE_SIZE) { pr_err("unable to get direct memory access to mtd:%s\n", sb->s_mtd->name); return err ? : -ENODATA; } pr_info("checking physical address %pap for linear cramfs image\n", &sbi->linear_phys_addr); err = cramfs_read_super(sb, fc, &super); if (err) return err; /* Remap the whole filesystem now */ pr_info("linear cramfs image on mtd:%s appears to be %lu KB in size\n", sb->s_mtd->name, sbi->size/1024); mtd_unpoint(sb->s_mtd, 0, PAGE_SIZE); err = mtd_point(sb->s_mtd, 0, sbi->size, &sbi->mtd_point_size, &sbi->linear_virt_addr, &sbi->linear_phys_addr); if (err || sbi->mtd_point_size != sbi->size) { pr_err("unable to get direct memory access to mtd:%s\n", sb->s_mtd->name); return err ? : -ENODATA; } return cramfs_finalize_super(sb, &super.root); } static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = 0; if (sb->s_bdev) id = huge_encode_dev(sb->s_bdev->bd_dev); else if (sb->s_dev) id = huge_encode_dev(sb->s_dev); buf->f_type = CRAMFS_MAGIC; buf->f_bsize = PAGE_SIZE; buf->f_blocks = CRAMFS_SB(sb)->blocks; buf->f_bfree = 0; buf->f_bavail = 0; buf->f_files = CRAMFS_SB(sb)->files; buf->f_ffree = 0; buf->f_fsid = u64_to_fsid(id); buf->f_namelen = CRAMFS_MAXPATHLEN; return 0; } /* * Read a cramfs directory entry. */ static int cramfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; char *buf; unsigned int offset; /* Offset within the thing. */ if (ctx->pos >= inode->i_size) return 0; offset = ctx->pos; /* Directory entries are always 4-byte aligned */ if (offset & 3) return -EINVAL; buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL); if (!buf) return -ENOMEM; while (offset < inode->i_size) { struct cramfs_inode *de; unsigned long nextoffset; char *name; ino_t ino; umode_t mode; int namelen; mutex_lock(&read_mutex); de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* * Namelengths on disk are shifted by two * and the name padded out to 4-byte boundaries * with zeroes. */ namelen = de->namelen << 2; memcpy(buf, name, namelen); ino = cramino(de, OFFSET(inode) + offset); mode = de->mode; mutex_unlock(&read_mutex); nextoffset = offset + sizeof(*de) + namelen; for (;;) { if (!namelen) { kfree(buf); return -EIO; } if (buf[namelen-1]) break; namelen--; } if (!dir_emit(ctx, buf, namelen, ino, mode >> 12)) break; ctx->pos = offset = nextoffset; } kfree(buf); return 0; } /* * Lookup and fill in the inode data.. */ static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned int offset = 0; struct inode *inode = NULL; int sorted; mutex_lock(&read_mutex); sorted = CRAMFS_SB(dir->i_sb)->flags & CRAMFS_FLAG_SORTED_DIRS; while (offset < dir->i_size) { struct cramfs_inode *de; char *name; int namelen, retval; int dir_off = OFFSET(dir) + offset; de = cramfs_read(dir->i_sb, dir_off, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* Try to take advantage of sorted directories */ if (sorted && (dentry->d_name.name[0] < name[0])) break; namelen = de->namelen << 2; offset += sizeof(*de) + namelen; /* Quick check that the name is roughly the right length */ if (((dentry->d_name.len + 3) & ~3) != namelen) continue; for (;;) { if (!namelen) { inode = ERR_PTR(-EIO); goto out; } if (name[namelen-1]) break; namelen--; } if (namelen != dentry->d_name.len) continue; retval = memcmp(dentry->d_name.name, name, namelen); if (retval > 0) continue; if (!retval) { inode = get_cramfs_inode(dir->i_sb, de, dir_off); break; } /* else (retval < 0) */ if (sorted) break; } out: mutex_unlock(&read_mutex); return d_splice_alias(inode, dentry); } static int cramfs_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; u32 maxblock; int bytes_filled; void *pgdata; bool success = false; maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; bytes_filled = 0; pgdata = kmap_local_folio(folio, 0); if (folio->index < maxblock) { struct super_block *sb = inode->i_sb; u32 blkptr_offset = OFFSET(inode) + folio->index * 4; u32 block_ptr, block_start, block_len; bool uncompressed, direct; mutex_lock(&read_mutex); block_ptr = *(u32 *) cramfs_read(sb, blkptr_offset, 4); uncompressed = (block_ptr & CRAMFS_BLK_FLAG_UNCOMPRESSED); direct = (block_ptr & CRAMFS_BLK_FLAG_DIRECT_PTR); block_ptr &= ~CRAMFS_BLK_FLAGS; if (direct) { /* * The block pointer is an absolute start pointer, * shifted by 2 bits. The size is included in the * first 2 bytes of the data block when compressed, * or PAGE_SIZE otherwise. */ block_start = block_ptr << CRAMFS_BLK_DIRECT_PTR_SHIFT; if (uncompressed) { block_len = PAGE_SIZE; /* if last block: cap to file length */ if (folio->index == maxblock - 1) block_len = offset_in_page(inode->i_size); } else { block_len = *(u16 *) cramfs_read(sb, block_start, 2); block_start += 2; } } else { /* * The block pointer indicates one past the end of * the current block (start of next block). If this * is the first block then it starts where the block * pointer table ends, otherwise its start comes * from the previous block's pointer. */ block_start = OFFSET(inode) + maxblock * 4; if (folio->index) block_start = *(u32 *) cramfs_read(sb, blkptr_offset - 4, 4); /* Beware... previous ptr might be a direct ptr */ if (unlikely(block_start & CRAMFS_BLK_FLAG_DIRECT_PTR)) { /* See comments on earlier code. */ u32 prev_start = block_start; block_start = prev_start & ~CRAMFS_BLK_FLAGS; block_start <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; if (prev_start & CRAMFS_BLK_FLAG_UNCOMPRESSED) { block_start += PAGE_SIZE; } else { block_len = *(u16 *) cramfs_read(sb, block_start, 2); block_start += 2 + block_len; } } block_start &= ~CRAMFS_BLK_FLAGS; block_len = block_ptr - block_start; } if (block_len == 0) ; /* hole */ else if (unlikely(block_len > 2*PAGE_SIZE || (uncompressed && block_len > PAGE_SIZE))) { mutex_unlock(&read_mutex); pr_err("bad data blocksize %u\n", block_len); goto err; } else if (uncompressed) { memcpy(pgdata, cramfs_read(sb, block_start, block_len), block_len); bytes_filled = block_len; } else { bytes_filled = cramfs_uncompress_block(pgdata, PAGE_SIZE, cramfs_read(sb, block_start, block_len), block_len); } mutex_unlock(&read_mutex); if (unlikely(bytes_filled < 0)) goto err; } memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled); flush_dcache_folio(folio); success = true; err: kunmap_local(pgdata); folio_end_read(folio, success); return 0; } static const struct address_space_operations cramfs_aops = { .read_folio = cramfs_read_folio }; /* * Our operations: */ /* * A directory can only readdir */ static const struct file_operations cramfs_directory_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = cramfs_readdir, }; static const struct inode_operations cramfs_dir_inode_operations = { .lookup = cramfs_lookup, }; static const struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; static int cramfs_get_tree(struct fs_context *fc) { int ret = -ENOPROTOOPT; if (IS_ENABLED(CONFIG_CRAMFS_MTD)) { ret = get_tree_mtd(fc, cramfs_mtd_fill_super); if (!ret) return 0; } if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) ret = get_tree_bdev(fc, cramfs_blkdev_fill_super); return ret; } static const struct fs_context_operations cramfs_context_ops = { .get_tree = cramfs_get_tree, .reconfigure = cramfs_reconfigure, }; /* * Set up the filesystem mount context. */ static int cramfs_init_fs_context(struct fs_context *fc) { fc->ops = &cramfs_context_ops; return 0; } static struct file_system_type cramfs_fs_type = { .owner = THIS_MODULE, .name = "cramfs", .init_fs_context = cramfs_init_fs_context, .kill_sb = cramfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("cramfs"); static int __init init_cramfs_fs(void) { int rv; rv = cramfs_uncompress_init(); if (rv < 0) return rv; rv = register_filesystem(&cramfs_fs_type); if (rv < 0) cramfs_uncompress_exit(); return rv; } static void __exit exit_cramfs_fs(void) { cramfs_uncompress_exit(); unregister_filesystem(&cramfs_fs_type); } module_init(init_cramfs_fs) module_exit(exit_cramfs_fs) MODULE_DESCRIPTION("Compressed ROM file system support"); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2006, Intel Corporation. * * Copyright (C) 2006-2008 Intel Corporation * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> */ #ifndef _IOVA_H_ #define _IOVA_H_ #include <linux/types.h> #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/dma-mapping.h> /* iova structure */ struct iova { struct rb_node node; unsigned long pfn_hi; /* Highest allocated pfn */ unsigned long pfn_lo; /* Lowest allocated pfn */ }; struct iova_rcache; /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached_node; /* Save last alloced node */ struct rb_node *cached32_node; /* Save last 32-bit alloced node */ unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache *rcaches; struct hlist_node cpuhp_dead; }; static inline unsigned long iova_size(struct iova *iova) { return iova->pfn_hi - iova->pfn_lo + 1; } static inline unsigned long iova_shift(struct iova_domain *iovad) { return __ffs(iovad->granule); } static inline unsigned long iova_mask(struct iova_domain *iovad) { return iovad->granule - 1; } static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova) { return iova & iova_mask(iovad); } static inline size_t iova_align(struct iova_domain *iovad, size_t size) { return ALIGN(size, iovad->granule); } static inline size_t iova_align_down(struct iova_domain *iovad, size_t size) { return ALIGN_DOWN(size, iovad->granule); } static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) { return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); } static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) { return iova >> iova_shift(iovad); } #if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); unsigned long iova_rcache_range(void); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); int iova_domain_init_rcaches(struct iova_domain *iovad); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else static inline int iova_cache_get(void) { return -ENOTSUPP; } static inline void iova_cache_put(void) { } static inline void free_iova(struct iova_domain *iovad, unsigned long pfn) { } static inline void __free_iova(struct iova_domain *iovad, struct iova *iova) { } static inline struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned) { return NULL; } static inline void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) { } static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache) { return 0; } static inline struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi) { return NULL; } static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn) { } static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { return NULL; } static inline void put_iova_domain(struct iova_domain *iovad) { } #endif #endif
18 18 18 17 18 18 18 18 8 9 8 1 8 9 82 82 82 1 3 82 81 2 77 462 451 13 13 13 13 5 5 1 1 3 13 50 50 50 50 50 30 32 50 50 18 18 18 18 1 18 130 80 50 32 18 18 50 32 18 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 // SPDX-License-Identifier: GPL-2.0-only /* * Yama Linux Security Module * * Author: Kees Cook <keescook@chromium.org> * * Copyright (C) 2010 Canonical, Ltd. * Copyright (C) 2011 The Chromium OS Authors. */ #include <linux/lsm_hooks.h> #include <linux/sysctl.h> #include <linux/ptrace.h> #include <linux/prctl.h> #include <linux/ratelimit.h> #include <linux/workqueue.h> #include <linux/string_helpers.h> #include <linux/task_work.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <uapi/linux/lsm.h> #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 #define YAMA_SCOPE_CAPABILITY 2 #define YAMA_SCOPE_NO_ATTACH 3 static int ptrace_scope = YAMA_SCOPE_RELATIONAL; /* describe a ptrace relationship for potential exception */ struct ptrace_relation { struct task_struct *tracer; struct task_struct *tracee; bool invalid; struct list_head node; struct rcu_head rcu; }; static LIST_HEAD(ptracer_relations); static DEFINE_SPINLOCK(ptracer_relations_lock); static void yama_relation_cleanup(struct work_struct *work); static DECLARE_WORK(yama_relation_work, yama_relation_cleanup); struct access_report_info { struct callback_head work; const char *access; struct task_struct *target; struct task_struct *agent; }; static void __report_access(struct callback_head *work) { struct access_report_info *info = container_of(work, struct access_report_info, work); char *target_cmd, *agent_cmd; target_cmd = kstrdup_quotable_cmdline(info->target, GFP_KERNEL); agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_KERNEL); pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", info->access, target_cmd, info->target->pid, agent_cmd, info->agent->pid); kfree(agent_cmd); kfree(target_cmd); put_task_struct(info->agent); put_task_struct(info->target); kfree(info); } /* defers execution because cmdline access can sleep */ static void report_access(const char *access, struct task_struct *target, struct task_struct *agent) { struct access_report_info *info; char agent_comm[sizeof(agent->comm)]; assert_spin_locked(&target->alloc_lock); /* for target->comm */ if (current->flags & PF_KTHREAD) { /* I don't think kthreads call task_work_run() before exiting. * Imagine angry ranting about procfs here. */ pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", access, target->comm, target->pid, get_task_comm(agent_comm, agent), agent->pid); return; } info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return; init_task_work(&info->work, __report_access); get_task_struct(target); get_task_struct(agent); info->access = access; info->target = target; info->agent = agent; if (task_work_add(current, &info->work, TWA_RESUME) == 0) return; /* success */ WARN(1, "report_access called from exiting task"); put_task_struct(target); put_task_struct(agent); kfree(info); } /** * yama_relation_cleanup - remove invalid entries from the relation list * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) { struct ptrace_relation *relation; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) { list_del_rcu(&relation->node); kfree_rcu(relation, rcu); } } rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); } /** * yama_ptracer_add - add/replace an exception for this tracer/tracee pair * @tracer: the task_struct of the process doing the ptrace * @tracee: the task_struct of the process to be ptraced * * Each tracee can have, at most, one tracer registered. Each time this * is called, the prior registered tracer will be replaced for the tracee. * * Returns 0 if relationship was added, -ve on error. */ static int yama_ptracer_add(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation, *added; added = kmalloc(sizeof(*added), GFP_KERNEL); if (!added) return -ENOMEM; added->tracee = tracee; added->tracer = tracer; added->invalid = false; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { list_replace_rcu(&relation->node, &added->node); kfree_rcu(relation, rcu); goto out; } } list_add_rcu(&added->node, &ptracer_relations); out: rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); return 0; } /** * yama_ptracer_del - remove exceptions related to the given tasks * @tracer: remove any relation where tracer task matches * @tracee: remove any relation where tracee task matches */ static void yama_ptracer_del(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation; bool marked = false; rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee || (tracer && relation->tracer == tracer)) { relation->invalid = true; marked = true; } } rcu_read_unlock(); if (marked) schedule_work(&yama_relation_work); } /** * yama_task_free - check for task_pid to remove from exception list * @task: task being removed */ static void yama_task_free(struct task_struct *task) { yama_ptracer_del(task, task); } /** * yama_task_prctl - check for Yama-specific prctl operations * @option: operation * @arg2: argument * @arg3: argument * @arg4: argument * @arg5: argument * * Return 0 on success, -ve on error. -ENOSYS is returned when Yama * does not handle the given option. */ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int rc = -ENOSYS; struct task_struct *myself = current; switch (option) { case PR_SET_PTRACER: /* Since a thread can call prctl(), find the group leader * before calling _add() or _del() on it, since we want * process-level granularity of control. The tracer group * leader checking is handled later when walking the ancestry * at the time of PTRACE_ATTACH check. */ rcu_read_lock(); if (!thread_group_leader(myself)) myself = rcu_dereference(myself->group_leader); get_task_struct(myself); rcu_read_unlock(); if (arg2 == 0) { yama_ptracer_del(NULL, myself); rc = 0; } else if (arg2 == PR_SET_PTRACER_ANY || (int)arg2 == -1) { rc = yama_ptracer_add(NULL, myself); } else { struct task_struct *tracer; tracer = find_get_task_by_vpid(arg2); if (!tracer) { rc = -EINVAL; } else { rc = yama_ptracer_add(tracer, myself); put_task_struct(tracer); } } put_task_struct(myself); break; } return rc; } /** * task_is_descendant - walk up a process family tree looking for a match * @parent: the process to compare against while walking up from child * @child: the process to start from while looking upwards for parent * * Returns 1 if child is a descendant of parent, 0 if not. */ static int task_is_descendant(struct task_struct *parent, struct task_struct *child) { int rc = 0; struct task_struct *walker = child; if (!parent || !child) return 0; rcu_read_lock(); if (!thread_group_leader(parent)) parent = rcu_dereference(parent->group_leader); while (walker->pid > 0) { if (!thread_group_leader(walker)) walker = rcu_dereference(walker->group_leader); if (walker == parent) { rc = 1; break; } walker = rcu_dereference(walker->real_parent); } rcu_read_unlock(); return rc; } /** * ptracer_exception_found - tracer registered as exception for this tracee * @tracer: the task_struct of the process attempting ptrace * @tracee: the task_struct of the process to be ptraced * * Returns 1 if tracer has a ptracer exception ancestor for tracee. */ static int ptracer_exception_found(struct task_struct *tracer, struct task_struct *tracee) { int rc = 0; struct ptrace_relation *relation; struct task_struct *parent = NULL; bool found = false; rcu_read_lock(); /* * If there's already an active tracing relationship, then make an * exception for the sake of other accesses, like process_vm_rw(). */ parent = ptrace_parent(tracee); if (parent != NULL && same_thread_group(parent, tracer)) { rc = 1; goto unlock; } /* Look for a PR_SET_PTRACER relationship. */ if (!thread_group_leader(tracee)) tracee = rcu_dereference(tracee->group_leader); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { parent = relation->tracer; found = true; break; } } if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; unlock: rcu_read_unlock(); return rc; } /** * yama_ptrace_access_check - validate PTRACE_ATTACH calls * @child: task that current task is attempting to ptrace * @mode: ptrace attach mode * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_access_check(struct task_struct *child, unsigned int mode) { int rc = 0; /* require ptrace target be a child of ptracer on attach */ if (mode & PTRACE_MODE_ATTACH) { switch (ptrace_scope) { case YAMA_SCOPE_DISABLED: /* No additional restrictions. */ break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); if (!pid_alive(child)) rc = -EPERM; if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_CAPABILITY: rcu_read_lock(); if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: default: rc = -EPERM; break; } } if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) report_access("attach", child, current); return rc; } /** * yama_ptrace_traceme - validate PTRACE_TRACEME calls * @parent: task that will become the ptracer of the current task * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_traceme(struct task_struct *parent) { int rc = 0; /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; break; } if (rc) { task_lock(current); report_access("traceme", current, parent); task_unlock(current); } return rc; } static const struct lsm_id yama_lsmid = { .name = "yama", .id = LSM_ID_YAMA, }; static struct security_hook_list yama_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), LSM_HOOK_INIT(task_prctl, yama_task_prctl), LSM_HOOK_INIT(task_free, yama_task_free), }; #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table_copy; if (write && !capable(CAP_SYS_PTRACE)) return -EPERM; /* Lock the max value if it ever gets set. */ table_copy = *table; if (*(int *)table_copy.data == *(int *)table_copy.extra2) table_copy.extra1 = table_copy.extra2; return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); } static int max_scope = YAMA_SCOPE_NO_ATTACH; static struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", .data = &ptrace_scope, .maxlen = sizeof(int), .mode = 0644, .proc_handler = yama_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &max_scope, }, }; static void __init yama_init_sysctl(void) { if (!register_sysctl("kernel/yama", yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); } #else static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), &yama_lsmid); yama_init_sysctl(); return 0; } DEFINE_LSM(yama) = { .name = "yama", .init = yama_init, };
10 2 8 2 6 2 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; struct in6_addr pfx; __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; /* Ensure that LSB of prefix is zero */ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) return -EINVAL; ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, struct in6_addr *addr) { unsigned int pfx_len; unsigned int i, idx; __be32 mask; __sum16 sum; pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); for (i = 0; i < pfx_len; i += 32) { if (pfx_len - i >= 32) mask = 0; else mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) idx = 3; else { for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { if ((__force __sum16)addr->s6_addr16[idx] != CSUM_MANGLED_0) break; } if (idx == ARRAY_SIZE(addr->s6_addr16)) return false; } sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; return true; } static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb, struct ipv6hdr *_bounced_hdr) { if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NULL; if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type)) return NULL; return skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct icmp6hdr), sizeof(struct ipv6hdr), _bounced_hdr); } static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } /* rewrite dst addr of bounced packet which was sent to dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->daddr); } return XT_CONTINUE; } static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } /* rewrite src addr of bounced packet which was sent from dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->saddr); } return XT_CONTINUE; } static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_POST_ROUTING), .me = THIS_MODULE, }, { .name = "DNPT", .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init ip6t_npt_init(void) { return xt_register_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } static void __exit ip6t_npt_exit(void) { xt_unregister_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } module_init(ip6t_npt_init); module_exit(ip6t_npt_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_SNPT"); MODULE_ALIAS("ip6t_DNPT");
32 600 619 556 601 36 19 19 6 18 5 7 2 2 19 9 13 1 7 523 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2023 Isovalent */ #ifndef __NET_TCX_H #define __NET_TCX_H #include <linux/bpf.h> #include <linux/bpf_mprog.h> #include <net/sch_generic.h> struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; u32 miniq_active; struct rcu_head rcu; }; struct tcx_link { struct bpf_link link; struct net_device *dev; u32 location; }; static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress) { #ifdef CONFIG_NET_XGRESS skb->tc_at_ingress = ingress; #endif } #ifdef CONFIG_NET_XGRESS static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry) { struct bpf_mprog_bundle *bundle = entry->parent; return container_of(bundle, struct tcx_entry, bundle); } static inline struct tcx_link *tcx_link(const struct bpf_link *link) { return container_of(link, struct tcx_link, link); } void tcx_inc(void); void tcx_dec(void); static inline void tcx_entry_sync(void) { /* bpf_mprog_entry got a/b swapped, therefore ensure that * there are no inflight users on the old one anymore. */ synchronize_rcu(); } static inline void tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry, bool ingress) { ASSERT_RTNL(); if (ingress) rcu_assign_pointer(dev->tcx_ingress, entry); else rcu_assign_pointer(dev->tcx_egress, entry); } static inline struct bpf_mprog_entry * tcx_entry_fetch(struct net_device *dev, bool ingress) { ASSERT_RTNL(); if (ingress) return rcu_dereference_rtnl(dev->tcx_ingress); else return rcu_dereference_rtnl(dev->tcx_egress); } static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void) { struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL); if (tcx) { bpf_mprog_bundle_init(&tcx->bundle); return &tcx->bundle.a; } return NULL; } #define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__)) static inline void tcx_entry_free(struct bpf_mprog_entry *entry) { kfree_rcu(tcx_entry(entry), rcu); } static inline struct bpf_mprog_entry * tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created) { struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress); *created = false; if (!entry) { entry = tcx_entry_create(); if (!entry) return NULL; *created = true; } return entry; } static inline void tcx_skeys_inc(bool ingress) { tcx_inc(); if (ingress) net_inc_ingress_queue(); else net_inc_egress_queue(); } static inline void tcx_skeys_dec(bool ingress) { if (ingress) net_dec_ingress_queue(); else net_dec_egress_queue(); tcx_dec(); } static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); tcx_entry(entry)->miniq_active++; } static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active; } static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb, int code) { switch (code) { case TCX_PASS: skb->tc_index = qdisc_skb_cb(skb)->tc_classid; fallthrough; case TCX_DROP: case TCX_REDIRECT: return code; case TCX_NEXT: default: return TCX_NEXT; } } #endif /* CONFIG_NET_XGRESS */ #if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL) int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); void tcx_uninstall(struct net_device *dev, bool ingress); int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); static inline void dev_tcx_uninstall(struct net_device *dev) { ASSERT_RTNL(); tcx_uninstall(dev, true); tcx_uninstall(dev, false); } #else static inline int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { return -EINVAL; } static inline void dev_tcx_uninstall(struct net_device *dev) { } #endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */ #endif /* __NET_TCX_H */
2 6 283 286 286 282 283 725 117 447 542 717 506 446 31 728 610 608 606 372 6 313 604 10 10 10 10 2 2 1 8 1 5 3 278 1 277 3 259 590 22 447 9 261 3 49 586 5 585 594 22 593 594 197 464 40 17 599 23 126 265 233 265 230 1 169 168 230 253 125 6 587 60 596 1 3 592 246 589 188 168 19 169 12 4 181 184 176 8 5 20 3 32 1 5 1 1 25 4 5 22 19 19 8 5 26 129 122 1 19 5 6 2 6 3 2 2 6 2 6 28 28 232 2 182 72 2 7 10 255 2 32 240 63 37 28 198 199 199 231 200 28 50 189 79 64 64 64 57 6 2 63 1 20 5 6 11 4 1 3 2 21 2 19 17 1 7 6 4 3 17 17 4 1 2 2 3 5 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 // SPDX-License-Identifier: GPL-2.0-or-later /* Keyring handling * * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/seq_file.h> #include <linux/err.h> #include <linux/user_namespace.h> #include <linux/nsproxy.h> #include <keys/keyring-type.h> #include <keys/user-type.h> #include <linux/assoc_array_priv.h> #include <linux/uaccess.h> #include <net/net_namespace.h> #include "internal.h" /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. */ #define KEYRING_SEARCH_MAX_DEPTH 6 /* * We mark pointers we pass to the associative array with bit 1 set if * they're keyrings and clear otherwise. */ #define KEYRING_PTR_SUBTYPE 0x2UL static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) { return (unsigned long)x & KEYRING_PTR_SUBTYPE; } static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) { void *object = assoc_array_ptr_to_leaf(x); return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); } static inline void *keyring_key_to_ptr(struct key *key) { if (key->type == &key_type_keyring) return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); return key; } static DEFINE_RWLOCK(keyring_name_lock); /* * Clean up the bits of user_namespace that belong to us. */ void key_free_user_ns(struct user_namespace *ns) { write_lock(&keyring_name_lock); list_del_init(&ns->keyring_name_list); write_unlock(&keyring_name_lock); key_put(ns->user_keyring_register); #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif } /* * The keyring key type definition. Keyrings are simply keys of this type and * can be treated as ordinary keys in addition to having their own special * operations. */ static int keyring_preparse(struct key_preparsed_payload *prep); static void keyring_free_preparse(struct key_preparsed_payload *prep); static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); static long keyring_read(const struct key *keyring, char *buffer, size_t buflen); struct key_type key_type_keyring = { .name = "keyring", .def_datalen = 0, .preparse = keyring_preparse, .free_preparse = keyring_free_preparse, .instantiate = keyring_instantiate, .revoke = keyring_revoke, .destroy = keyring_destroy, .describe = keyring_describe, .read = keyring_read, }; EXPORT_SYMBOL(key_type_keyring); /* * Semaphore to serialise link/link calls to prevent two link calls in parallel * introducing a cycle. */ static DEFINE_MUTEX(keyring_serialise_link_lock); /* * Publish the name of a keyring so that it can be found by name (if it has * one and it doesn't begin with a dot). */ static void keyring_publish_name(struct key *keyring) { struct user_namespace *ns = current_user_ns(); if (keyring->description && keyring->description[0] && keyring->description[0] != '.') { write_lock(&keyring_name_lock); list_add_tail(&keyring->name_link, &ns->keyring_name_list); write_unlock(&keyring_name_lock); } } /* * Preparse a keyring payload */ static int keyring_preparse(struct key_preparsed_payload *prep) { return prep->datalen != 0 ? -EINVAL : 0; } /* * Free a preparse of a user defined key payload */ static void keyring_free_preparse(struct key_preparsed_payload *prep) { } /* * Initialise a keyring. * * Returns 0 on success, -EINVAL if given any data. */ static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep) { assoc_array_init(&keyring->keys); /* make the keyring available by name if it has one */ keyring_publish_name(keyring); return 0; } /* * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd * fold the carry back too, but that requires inline asm. */ static u64 mult_64x32_and_fold(u64 x, u32 y) { u64 hi = (u64)(u32)(x >> 32) * y; u64 lo = (u64)(u32)(x) * y; return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); } /* * Hash a key type and description. */ static void hash_key_type_and_desc(struct keyring_index_key *index_key) { const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK; const char *description = index_key->description; unsigned long hash, type; u32 piece; u64 acc; int n, desc_len = index_key->desc_len; type = (unsigned long)index_key->type; acc = mult_64x32_and_fold(type, desc_len + 13); acc = mult_64x32_and_fold(acc, 9207); piece = (unsigned long)index_key->domain_tag; acc = mult_64x32_and_fold(acc, piece); acc = mult_64x32_and_fold(acc, 9207); for (;;) { n = desc_len; if (n <= 0) break; if (n > 4) n = 4; piece = 0; memcpy(&piece, description, n); description += n; desc_len -= n; acc = mult_64x32_and_fold(acc, piece); acc = mult_64x32_and_fold(acc, 9207); } /* Fold the hash down to 32 bits if need be. */ hash = acc; if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) hash ^= acc >> 32; /* Squidge all the keyrings into a separate part of the tree to * ordinary keys by making sure the lowest level segment in the hash is * zero for keyrings and non-zero otherwise. */ if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0) hash |= (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; else if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) hash = (hash + (hash << level_shift)) & ~fan_mask; index_key->hash = hash; } /* * Finalise an index key to include a part of the description actually in the * index key, to set the domain tag and to calculate the hash. */ void key_set_index_key(struct keyring_index_key *index_key) { static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), }; size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); memcpy(index_key->desc, index_key->description, n); if (!index_key->domain_tag) { if (index_key->type->flags & KEY_TYPE_NET_DOMAIN) index_key->domain_tag = current->nsproxy->net_ns->key_domain; else index_key->domain_tag = &default_domain_tag; } hash_key_type_and_desc(index_key); } /** * key_put_tag - Release a ref on a tag. * @tag: The tag to release. * * This releases a reference the given tag and returns true if that ref was the * last one. */ bool key_put_tag(struct key_tag *tag) { if (refcount_dec_and_test(&tag->usage)) { kfree_rcu(tag, rcu); return true; } return false; } /** * key_remove_domain - Kill off a key domain and gc its keys * @domain_tag: The domain tag to release. * * This marks a domain tag as being dead and releases a ref on it. If that * wasn't the last reference, the garbage collector is poked to try and delete * all keys that were in the domain. */ void key_remove_domain(struct key_tag *domain_tag) { domain_tag->removed = true; if (!key_put_tag(domain_tag)) key_schedule_gc_links(); } /* * Build the next index key chunk. * * We return it one word-sized chunk at a time. */ static unsigned long keyring_get_key_chunk(const void *data, int level) { const struct keyring_index_key *index_key = data; unsigned long chunk = 0; const u8 *d; int desc_len = index_key->desc_len, n = sizeof(chunk); level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; switch (level) { case 0: return index_key->hash; case 1: return index_key->x; case 2: return (unsigned long)index_key->type; case 3: return (unsigned long)index_key->domain_tag; default: level -= 4; if (desc_len <= sizeof(index_key->desc)) return 0; d = index_key->description + sizeof(index_key->desc); d += level * sizeof(long); desc_len -= sizeof(index_key->desc); if (desc_len > n) desc_len = n; do { chunk <<= 8; chunk |= *d++; } while (--desc_len > 0); return chunk; } } static unsigned long keyring_get_object_key_chunk(const void *object, int level) { const struct key *key = keyring_ptr_to_key(object); return keyring_get_key_chunk(&key->index_key, level); } static bool keyring_compare_object(const void *object, const void *data) { const struct keyring_index_key *index_key = data; const struct key *key = keyring_ptr_to_key(object); return key->index_key.type == index_key->type && key->index_key.domain_tag == index_key->domain_tag && key->index_key.desc_len == index_key->desc_len && memcmp(key->index_key.description, index_key->description, index_key->desc_len) == 0; } /* * Compare the index keys of a pair of objects and determine the bit position * at which they differ - if they differ. */ static int keyring_diff_objects(const void *object, const void *data) { const struct key *key_a = keyring_ptr_to_key(object); const struct keyring_index_key *a = &key_a->index_key; const struct keyring_index_key *b = data; unsigned long seg_a, seg_b; int level, i; level = 0; seg_a = a->hash; seg_b = b->hash; if ((seg_a ^ seg_b) != 0) goto differ; level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; /* The number of bits contributed by the hash is controlled by a * constant in the assoc_array headers. Everything else thereafter we * can deal with as being machine word-size dependent. */ seg_a = a->x; seg_b = b->x; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); /* The next bit may not work on big endian */ seg_a = (unsigned long)a->type; seg_b = (unsigned long)b->type; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); seg_a = (unsigned long)a->domain_tag; seg_b = (unsigned long)b->domain_tag; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); i = sizeof(a->desc); if (a->desc_len <= i) goto same; for (; i < a->desc_len; i++) { seg_a = *(unsigned char *)(a->description + i); seg_b = *(unsigned char *)(b->description + i); if ((seg_a ^ seg_b) != 0) goto differ_plus_i; } same: return -1; differ_plus_i: level += i; differ: i = level * 8 + __ffs(seg_a ^ seg_b); return i; } /* * Free an object after stripping the keyring flag off of the pointer. */ static void keyring_free_object(void *object) { key_put(keyring_ptr_to_key(object)); } /* * Operations for keyring management by the index-tree routines. */ static const struct assoc_array_ops keyring_assoc_array_ops = { .get_key_chunk = keyring_get_key_chunk, .get_object_key_chunk = keyring_get_object_key_chunk, .compare_object = keyring_compare_object, .diff_objects = keyring_diff_objects, .free_object = keyring_free_object, }; /* * Clean up a keyring when it is destroyed. Unpublish its name if it had one * and dispose of its data. * * The garbage collector detects the final key_put(), removes the keyring from * the serial number tree and then does RCU synchronisation before coming here, * so we shouldn't need to worry about code poking around here with the RCU * readlock held by this time. */ static void keyring_destroy(struct key *keyring) { if (keyring->description) { write_lock(&keyring_name_lock); if (keyring->name_link.next != NULL && !list_empty(&keyring->name_link)) list_del(&keyring->name_link); write_unlock(&keyring_name_lock); } if (keyring->restrict_link) { struct key_restriction *keyres = keyring->restrict_link; key_put(keyres->key); kfree(keyres); } assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } /* * Describe a keyring for /proc. */ static void keyring_describe(const struct key *keyring, struct seq_file *m) { if (keyring->description) seq_puts(m, keyring->description); else seq_puts(m, "[anon]"); if (key_is_positive(keyring)) { if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else seq_puts(m, ": empty"); } } struct keyring_read_iterator_context { size_t buflen; size_t count; key_serial_t *buffer; }; static int keyring_read_iterator(const void *object, void *data) { struct keyring_read_iterator_context *ctx = data; const struct key *key = keyring_ptr_to_key(object); kenter("{%s,%d},,{%zu/%zu}", key->type->name, key->serial, ctx->count, ctx->buflen); if (ctx->count >= ctx->buflen) return 1; *ctx->buffer++ = key->serial; ctx->count += sizeof(key->serial); return 0; } /* * Read a list of key IDs from the keyring's contents in binary form * * The keyring's semaphore is read-locked by the caller. This prevents someone * from modifying it under us - which could cause us to read key IDs multiple * times. */ static long keyring_read(const struct key *keyring, char *buffer, size_t buflen) { struct keyring_read_iterator_context ctx; long ret; kenter("{%d},,%zu", key_serial(keyring), buflen); if (buflen & (sizeof(key_serial_t) - 1)) return -EINVAL; /* Copy as many key IDs as fit into the buffer */ if (buffer && buflen) { ctx.buffer = (key_serial_t *)buffer; ctx.buflen = buflen; ctx.count = 0; ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); if (ret < 0) { kleave(" = %ld [iterate]", ret); return ret; } } /* Return the size of the buffer needed */ ret = keyring->keys.nr_leaves_on_tree * sizeof(key_serial_t); if (ret <= buflen) kleave("= %ld [ok]", ret); else kleave("= %ld [buffer too small]", ret); return ret; } /* * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, uid, gid, cred, perm, flags, restrict_link); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { key_put(keyring); keyring = ERR_PTR(ret); } } return keyring; } EXPORT_SYMBOL(keyring_alloc); /** * restrict_link_reject - Give -EPERM to restrict link * @keyring: The keyring being added to. * @type: The type of key being added. * @payload: The payload of the key intended to be added. * @restriction_key: Keys providing additional data for evaluating restriction. * * Reject the addition of any links to a keyring. It can be overridden by * passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when * adding a key to a keyring. * * This is meant to be stored in a key_restriction structure which is passed * in the restrict_link parameter to keyring_alloc(). */ int restrict_link_reject(struct key *keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key) { return -EPERM; } /* * By default, we keys found by getting an exact match on their descriptions. */ bool key_default_cmp(const struct key *key, const struct key_match_data *match_data) { return strcmp(key->description, match_data->raw_data) == 0; } /* * Iteration function to consider each key found. */ static int keyring_search_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); unsigned long kflags = READ_ONCE(key->flags); short state = READ_ONCE(key->state); kenter("{%d}", key->serial); /* ignore keys not of this type */ if (key->type != ctx->index_key.type) { kleave(" = 0 [!type]"); return 0; } /* skip invalidated, revoked and expired keys */ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { time64_t expiry = READ_ONCE(key->expiry); if (kflags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) { ctx->result = ERR_PTR(-EKEYREVOKED); kleave(" = %d [invrev]", ctx->skipped_ret); goto skipped; } if (expiry && ctx->now >= expiry) { if (!(ctx->flags & KEYRING_SEARCH_SKIP_EXPIRED)) ctx->result = ERR_PTR(-EKEYEXPIRED); kleave(" = %d [expire]", ctx->skipped_ret); goto skipped; } } /* keys that don't match */ if (!ctx->match_data.cmp(key, &ctx->match_data)) { kleave(" = 0 [!match]"); return 0; } /* key must have search permissions */ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && key_task_permission(make_key_ref(key, ctx->possessed), ctx->cred, KEY_NEED_SEARCH) < 0) { ctx->result = ERR_PTR(-EACCES); kleave(" = %d [!perm]", ctx->skipped_ret); goto skipped; } if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { /* we set a different error code if we pass a negative key */ if (state < 0) { ctx->result = ERR_PTR(state); kleave(" = %d [neg]", ctx->skipped_ret); goto skipped; } } /* Found */ ctx->result = make_key_ref(key, ctx->possessed); kleave(" = 1 [found]"); return 1; skipped: return ctx->skipped_ret; } /* * Search inside a keyring for a key. We can search by walking to it * directly based on its index-key or we can iterate over the entire * tree looking for it, based on the match function. */ static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) { if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_DIRECT) { const void *object; object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, &ctx->index_key); return object ? ctx->iterator(object, ctx) : 0; } return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); } /* * Search a tree of keyrings that point to other keyrings up to the maximum * depth. */ static bool search_nested_keyrings(struct key *keyring, struct keyring_search_context *ctx) { struct { struct key *keyring; struct assoc_array_node *node; int slot; } stack[KEYRING_SEARCH_MAX_DEPTH]; struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *ptr; struct key *key; int sp = 0, slot; kenter("{%d},{%s,%s}", keyring->serial, ctx->index_key.type->name, ctx->index_key.description); #define STATE_CHECKS (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_DO_STATE_CHECK) BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); if (ctx->index_key.description) key_set_index_key(&ctx->index_key); /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_ITERATE || keyring_compare_object(keyring, &ctx->index_key)) { ctx->skipped_ret = 2; switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { case 1: goto found; case 2: return false; default: break; } } ctx->skipped_ret = 0; /* Start processing a new keyring */ descend_to_keyring: kdebug("descend to %d", keyring->serial); if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) goto not_this_keyring; /* Search through the keys in this keyring before its searching its * subtrees. */ if (search_keyring(keyring, ctx)) goto found; /* Then manually iterate through the keyrings nested in this one. * * Start from the root node of the index tree. Because of the way the * hash function has been set up, keyrings cluster on the leftmost * branch of the root node (root slot 0) or in the root node itself. * Non-keyrings avoid the leftmost branch of the root entirely (root * slots 1-15). */ if (!(ctx->flags & KEYRING_SEARCH_RECURSE)) goto not_this_keyring; ptr = READ_ONCE(keyring->keys.root); if (!ptr) goto not_this_keyring; if (assoc_array_ptr_is_shortcut(ptr)) { /* If the root is a shortcut, either the keyring only contains * keyring pointers (everything clusters behind root slot 0) or * doesn't contain any keyring pointers. */ shortcut = assoc_array_ptr_to_shortcut(ptr); if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) goto not_this_keyring; ptr = READ_ONCE(shortcut->next_node); node = assoc_array_ptr_to_node(ptr); goto begin_node; } node = assoc_array_ptr_to_node(ptr); ptr = node->slots[0]; if (!assoc_array_ptr_is_meta(ptr)) goto begin_node; descend_to_node: /* Descend to a more distal node in this keyring's content tree and go * through that. */ kdebug("descend"); if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = READ_ONCE(shortcut->next_node); BUG_ON(!assoc_array_ptr_is_node(ptr)); } node = assoc_array_ptr_to_node(ptr); begin_node: kdebug("begin_node"); slot = 0; ascend_to_node: /* Go through the slots in a node */ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); if (assoc_array_ptr_is_meta(ptr)) { if (node->back_pointer || assoc_array_ptr_is_shortcut(ptr)) goto descend_to_node; } if (!keyring_ptr_is_keyring(ptr)) continue; key = keyring_ptr_to_key(ptr); if (sp >= KEYRING_SEARCH_MAX_DEPTH) { if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { ctx->result = ERR_PTR(-ELOOP); return false; } goto not_this_keyring; } /* Search a nested keyring */ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && key_task_permission(make_key_ref(key, ctx->possessed), ctx->cred, KEY_NEED_SEARCH) < 0) continue; /* stack the current position */ stack[sp].keyring = keyring; stack[sp].node = node; stack[sp].slot = slot; sp++; /* begin again with the new keyring */ keyring = key; goto descend_to_keyring; } /* We've dealt with all the slots in the current node, so now we need * to ascend to the parent and continue processing there. */ ptr = READ_ONCE(node->back_pointer); slot = node->parent_slot; if (ptr && assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; } if (!ptr) goto not_this_keyring; node = assoc_array_ptr_to_node(ptr); slot++; /* If we've ascended to the root (zero backpointer), we must have just * finished processing the leftmost branch rather than the root slots - * so there can't be any more keyrings for us to find. */ if (node->back_pointer) { kdebug("ascend %d", slot); goto ascend_to_node; } /* The keyring we're looking at was disqualified or didn't contain a * matching key. */ not_this_keyring: kdebug("not_this_keyring %d", sp); if (sp <= 0) { kleave(" = false"); return false; } /* Resume the processing of a keyring higher up in the tree */ sp--; keyring = stack[sp].keyring; node = stack[sp].node; slot = stack[sp].slot + 1; kdebug("ascend to %d [%d]", keyring->serial, slot); goto ascend_to_node; /* We found a viable match */ found: key = key_ref_to_ptr(ctx->result); key_check(key); if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { key->last_used_at = ctx->now; keyring->last_used_at = ctx->now; while (sp > 0) stack[--sp].keyring->last_used_at = ctx->now; } kleave(" = true"); return true; } /** * keyring_search_rcu - Search a keyring tree for a matching key under RCU * @keyring_ref: A pointer to the keyring with possession indicator. * @ctx: The keyring search context. * * Search the supplied keyring tree for a key that matches the criteria given. * The root keyring and any linked keyrings must grant Search permission to the * caller to be searchable and keys can only be found if they too grant Search * to the caller. The possession flag on the root keyring pointer controls use * of the possessor bits in permissions checking of the entire tree. In * addition, the LSM gets to forbid keyring searches and key matches. * * The search is performed as a breadth-then-depth search up to the prescribed * limit (KEYRING_SEARCH_MAX_DEPTH). The caller must hold the RCU read lock to * prevent keyrings from being destroyed or rearranged whilst they are being * searched. * * Keys are matched to the type provided and are then filtered by the match * function, which is given the description to use in any way it sees fit. The * match function may use any attributes of a key that it wishes to * determine the match. Normally the match function from the key type would be * used. * * RCU can be used to prevent the keyring key lists from disappearing without * the need to take lots of locks. * * Returns a pointer to the found key and increments the key usage count if * successful; -EAGAIN if no matching keys were found, or if expired or revoked * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the * specified keyring wasn't a keyring. * * In the case of a successful return, the possession attribute from * @keyring_ref is propagated to the returned key reference. */ key_ref_t keyring_search_rcu(key_ref_t keyring_ref, struct keyring_search_context *ctx) { struct key *keyring; long err; ctx->iterator = keyring_search_iterator; ctx->possessed = is_key_possessed(keyring_ref); ctx->result = ERR_PTR(-EAGAIN); keyring = key_ref_to_ptr(keyring_ref); key_check(keyring); if (keyring->type != &key_type_keyring) return ERR_PTR(-ENOTDIR); if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { err = key_task_permission(keyring_ref, ctx->cred, KEY_NEED_SEARCH); if (err < 0) return ERR_PTR(err); } ctx->now = ktime_get_real_seconds(); if (search_nested_keyrings(keyring, ctx)) __key_get(key_ref_to_ptr(ctx->result)); return ctx->result; } /** * keyring_search - Search the supplied keyring tree for a matching key * @keyring: The root of the keyring tree to be searched. * @type: The type of keyring we want to find. * @description: The name of the keyring we want to find. * @recurse: True to search the children of @keyring also * * As keyring_search_rcu() above, but using the current task's credentials and * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description, bool recurse) { struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = KEYRING_SEARCH_DO_STATE_CHECK, }; key_ref_t key; int ret; if (recurse) ctx.flags |= KEYRING_SEARCH_RECURSE; if (type->match_preparse) { ret = type->match_preparse(&ctx.match_data); if (ret < 0) return ERR_PTR(ret); } rcu_read_lock(); key = keyring_search_rcu(keyring, &ctx); rcu_read_unlock(); if (type->match_free) type->match_free(&ctx.match_data); return key; } EXPORT_SYMBOL(keyring_search); static struct key_restriction *keyring_restriction_alloc( key_restrict_link_func_t check) { struct key_restriction *keyres = kzalloc(sizeof(struct key_restriction), GFP_KERNEL); if (!keyres) return ERR_PTR(-ENOMEM); keyres->check = check; return keyres; } /* * Semaphore to serialise restriction setup to prevent reference count * cycles through restriction key pointers. */ static DECLARE_RWSEM(keyring_serialise_restrict_sem); /* * Check for restriction cycles that would prevent keyring garbage collection. * keyring_serialise_restrict_sem must be held. */ static bool keyring_detect_restriction_cycle(const struct key *dest_keyring, struct key_restriction *keyres) { while (keyres && keyres->key && keyres->key->type == &key_type_keyring) { if (keyres->key == dest_keyring) return true; keyres = keyres->key->restrict_link; } return false; } /** * keyring_restrict - Look up and apply a restriction to a keyring * @keyring_ref: The keyring to be restricted * @type: The key type that will provide the restriction checker. * @restriction: The restriction options to apply to the keyring * * Look up a keyring and apply a restriction to it. The restriction is managed * by the specific key type, but can be configured by the options specified in * the restriction string. */ int keyring_restrict(key_ref_t keyring_ref, const char *type, const char *restriction) { struct key *keyring; struct key_type *restrict_type = NULL; struct key_restriction *restrict_link; int ret = 0; keyring = key_ref_to_ptr(keyring_ref); key_check(keyring); if (keyring->type != &key_type_keyring) return -ENOTDIR; if (!type) { restrict_link = keyring_restriction_alloc(restrict_link_reject); } else { restrict_type = key_type_lookup(type); if (IS_ERR(restrict_type)) return PTR_ERR(restrict_type); if (!restrict_type->lookup_restriction) { ret = -ENOENT; goto error; } restrict_link = restrict_type->lookup_restriction(restriction); } if (IS_ERR(restrict_link)) { ret = PTR_ERR(restrict_link); goto error; } down_write(&keyring->sem); down_write(&keyring_serialise_restrict_sem); if (keyring->restrict_link) { ret = -EEXIST; } else if (keyring_detect_restriction_cycle(keyring, restrict_link)) { ret = -EDEADLK; } else { keyring->restrict_link = restrict_link; notify_key(keyring, NOTIFY_KEY_SETATTR, 0); } up_write(&keyring_serialise_restrict_sem); up_write(&keyring->sem); if (ret < 0) { key_put(restrict_link->key); kfree(restrict_link); } error: if (restrict_type) key_type_put(restrict_type); return ret; } EXPORT_SYMBOL(keyring_restrict); /* * Search the given keyring for a key that might be updated. * * The caller must guarantee that the keyring is a keyring and that the * permission is granted to modify the keyring as no check is made here. The * caller must also hold a lock on the keyring semaphore. * * Returns a pointer to the found key with usage count incremented if * successful and returns NULL if not found. Revoked and invalidated keys are * skipped over. * * If successful, the possession indicator is propagated from the keyring ref * to the returned key reference. */ key_ref_t find_key_to_update(key_ref_t keyring_ref, const struct keyring_index_key *index_key) { struct key *keyring, *key; const void *object; keyring = key_ref_to_ptr(keyring_ref); kenter("{%d},{%s,%s}", keyring->serial, index_key->type->name, index_key->description); object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, index_key); if (object) goto found; kleave(" = NULL"); return NULL; found: key = keyring_ptr_to_key(object); if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) { kleave(" = NULL [x]"); return NULL; } __key_get(key); kleave(" = {%d}", key->serial); return make_key_ref(key, is_key_possessed(keyring_ref)); } /* * Find a keyring with the specified name. * * Only keyrings that have nonzero refcount, are not revoked, and are owned by a * user in the current user namespace are considered. If @uid_keyring is %true, * the keyring additionally must have been allocated as a user or user session * keyring; otherwise, it must grant Search permission directly to the caller. * * Returns a pointer to the keyring with the keyring's refcount having being * incremented on success. -ENOKEY is returned if a key could not be found. */ struct key *find_keyring_by_name(const char *name, bool uid_keyring) { struct user_namespace *ns = current_user_ns(); struct key *keyring; if (!name) return ERR_PTR(-EINVAL); read_lock(&keyring_name_lock); /* Search this hash bucket for a keyring with a matching name that * grants Search permission and that hasn't been revoked */ list_for_each_entry(keyring, &ns->keyring_name_list, name_link) { if (!kuid_has_mapping(ns, keyring->user->uid)) continue; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) continue; if (strcmp(keyring->description, name) != 0) continue; if (uid_keyring) { if (!test_bit(KEY_FLAG_UID_KEYRING, &keyring->flags)) continue; } else { if (key_permission(make_key_ref(keyring, 0), KEY_NEED_SEARCH) < 0) continue; } /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' * (ie. it has a zero usage count) */ if (!refcount_inc_not_zero(&keyring->usage)) continue; keyring->last_used_at = ktime_get_real_seconds(); goto out; } keyring = ERR_PTR(-ENOKEY); out: read_unlock(&keyring_name_lock); return keyring; } static int keyring_detect_cycle_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); kenter("{%d}", key->serial); /* We might get a keyring with matching index-key that is nonetheless a * different keyring. */ if (key != ctx->match_data.raw_data) return 0; ctx->result = ERR_PTR(-EDEADLK); return 1; } /* * See if a cycle will be created by inserting acyclic tree B in acyclic * tree A at the topmost level (ie: as a direct child of A). * * Since we are adding B to A at the top level, checking for cycles should just * be a matter of seeing if node A is somewhere in tree B. */ static int keyring_detect_cycle(struct key *A, struct key *B) { struct keyring_search_context ctx = { .index_key = A->index_key, .match_data.raw_data = A, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .iterator = keyring_detect_cycle_iterator, .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_NO_UPDATE_TIME | KEYRING_SEARCH_NO_CHECK_PERM | KEYRING_SEARCH_DETECT_TOO_DEEP | KEYRING_SEARCH_RECURSE), }; rcu_read_lock(); search_nested_keyrings(B, &ctx); rcu_read_unlock(); return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } /* * Lock keyring for link. */ int __key_link_lock(struct key *keyring, const struct keyring_index_key *index_key) __acquires(&keyring->sem) __acquires(&keyring_serialise_link_lock) { if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); /* Serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders. */ if (index_key->type == &key_type_keyring) mutex_lock(&keyring_serialise_link_lock); return 0; } /* * Lock keyrings for move (link/unlink combination). */ int __key_move_lock(struct key *l_keyring, struct key *u_keyring, const struct keyring_index_key *index_key) __acquires(&l_keyring->sem) __acquires(&u_keyring->sem) __acquires(&keyring_serialise_link_lock) { if (l_keyring->type != &key_type_keyring || u_keyring->type != &key_type_keyring) return -ENOTDIR; /* We have to be very careful here to take the keyring locks in the * right order, lest we open ourselves to deadlocking against another * move operation. */ if (l_keyring < u_keyring) { down_write(&l_keyring->sem); down_write_nested(&u_keyring->sem, 1); } else { down_write(&u_keyring->sem); down_write_nested(&l_keyring->sem, 1); } /* Serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders. */ if (index_key->type == &key_type_keyring) mutex_lock(&keyring_serialise_link_lock); return 0; } /* * Preallocate memory so that a key can be linked into to a keyring. */ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit) { struct assoc_array_edit *edit; int ret; kenter("%d,%s,%s,", keyring->serial, index_key->type->name, index_key->description); BUG_ON(index_key->desc_len == 0); BUG_ON(*_edit != NULL); *_edit = NULL; ret = -EKEYREVOKED; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) goto error; /* Create an edit script that will insert/replace the key in the * keyring tree. */ edit = assoc_array_insert(&keyring->keys, &keyring_assoc_array_ops, index_key, NULL); if (IS_ERR(edit)) { ret = PTR_ERR(edit); goto error; } /* If we're not replacing a link in-place then we're going to need some * extra quota. */ if (!edit->dead_leaf) { ret = key_payload_reserve(keyring, keyring->datalen + KEYQUOTA_LINK_BYTES); if (ret < 0) goto error_cancel; } *_edit = edit; kleave(" = 0"); return 0; error_cancel: assoc_array_cancel_edit(edit); error: kleave(" = %d", ret); return ret; } /* * Check already instantiated keys aren't going to be a problem. * * The caller must have called __key_link_begin(). Don't need to call this for * keys that were created since __key_link_begin() was called. */ int __key_link_check_live_key(struct key *keyring, struct key *key) { if (key->type == &key_type_keyring) /* check that we aren't going to create a cycle by linking one * keyring to another */ return keyring_detect_cycle(keyring, key); return 0; } /* * Link a key into to a keyring. * * Must be called with __key_link_begin() having being called. Discards any * already extant link to matching key if there is one, so that each keyring * holds at most one link to any given key of a particular type+description * combination. */ void __key_link(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { __key_get(key); assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); assoc_array_apply_edit(*_edit); *_edit = NULL; notify_key(keyring, NOTIFY_KEY_LINKED, key_serial(key)); } /* * Finish linking a key into to a keyring. * * Must be called with __key_link_begin() having being called. */ void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit *edit) __releases(&keyring->sem) __releases(&keyring_serialise_link_lock) { BUG_ON(index_key->type == NULL); kenter("%d,%s,", keyring->serial, index_key->type->name); if (edit) { if (!edit->dead_leaf) { key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); } assoc_array_cancel_edit(edit); } up_write(&keyring->sem); if (index_key->type == &key_type_keyring) mutex_unlock(&keyring_serialise_link_lock); } /* * Check addition of keys to restricted keyrings. */ static int __key_link_check_restriction(struct key *keyring, struct key *key) { if (!keyring->restrict_link || !keyring->restrict_link->check) return 0; return keyring->restrict_link->check(keyring, key->type, &key->payload, keyring->restrict_link->key); } /** * key_link - Link a key to a keyring * @keyring: The keyring to make the link in. * @key: The key to link to. * * Make a link in a keyring to a key, such that the keyring holds a reference * on that key and the key can potentially be found by searching that keyring. * * This function will write-lock the keyring's semaphore and will consume some * of the user's key data quota to hold the link. * * Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring, * -EKEYREVOKED if the keyring has been revoked, -ENFILE if the keyring is * full, -EDQUOT if there is insufficient key data quota remaining to add * another link or -ENOMEM if there's insufficient memory. * * It is assumed that the caller has checked that it is permitted for a link to * be made (the keyring should have Write permission and the key Link * permission). */ int key_link(struct key *keyring, struct key *key) { struct assoc_array_edit *edit = NULL; int ret; kenter("{%d,%d}", keyring->serial, refcount_read(&keyring->usage)); key_check(keyring); key_check(key); ret = __key_link_lock(keyring, &key->index_key); if (ret < 0) goto error; ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error_end; kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage)); ret = __key_link_check_restriction(keyring, key); if (ret == 0) ret = __key_link_check_live_key(keyring, key); if (ret == 0) __key_link(keyring, key, &edit); error_end: __key_link_end(keyring, &key->index_key, edit); error: kleave(" = %d {%d,%d}", ret, keyring->serial, refcount_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); /* * Lock a keyring for unlink. */ static int __key_unlink_lock(struct key *keyring) __acquires(&keyring->sem) { if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); return 0; } /* * Begin the process of unlinking a key from a keyring. */ static int __key_unlink_begin(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { struct assoc_array_edit *edit; BUG_ON(*_edit != NULL); edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, &key->index_key); if (IS_ERR(edit)) return PTR_ERR(edit); if (!edit) return -ENOENT; *_edit = edit; return 0; } /* * Apply an unlink change. */ static void __key_unlink(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { assoc_array_apply_edit(*_edit); notify_key(keyring, NOTIFY_KEY_UNLINKED, key_serial(key)); *_edit = NULL; key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); } /* * Finish unlinking a key from to a keyring. */ static void __key_unlink_end(struct key *keyring, struct key *key, struct assoc_array_edit *edit) __releases(&keyring->sem) { if (edit) assoc_array_cancel_edit(edit); up_write(&keyring->sem); } /** * key_unlink - Unlink the first link to a key from a keyring. * @keyring: The keyring to remove the link from. * @key: The key the link is to. * * Remove a link from a keyring to a key. * * This function will write-lock the keyring's semaphore. * * Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring, -ENOENT if * the key isn't linked to by the keyring or -ENOMEM if there's insufficient * memory. * * It is assumed that the caller has checked that it is permitted for a link to * be removed (the keyring should have Write permission; no permissions are * required on the key). */ int key_unlink(struct key *keyring, struct key *key) { struct assoc_array_edit *edit = NULL; int ret; key_check(keyring); key_check(key); ret = __key_unlink_lock(keyring); if (ret < 0) return ret; ret = __key_unlink_begin(keyring, key, &edit); if (ret == 0) __key_unlink(keyring, key, &edit); __key_unlink_end(keyring, key, edit); return ret; } EXPORT_SYMBOL(key_unlink); /** * key_move - Move a key from one keyring to another * @key: The key to move * @from_keyring: The keyring to remove the link from. * @to_keyring: The keyring to make the link in. * @flags: Qualifying flags, such as KEYCTL_MOVE_EXCL. * * Make a link in @to_keyring to a key, such that the keyring holds a reference * on that key and the key can potentially be found by searching that keyring * whilst simultaneously removing a link to the key from @from_keyring. * * This function will write-lock both keyring's semaphores and will consume * some of the user's key data quota to hold the link on @to_keyring. * * Returns 0 if successful, -ENOTDIR if either keyring isn't a keyring, * -EKEYREVOKED if either keyring has been revoked, -ENFILE if the second * keyring is full, -EDQUOT if there is insufficient key data quota remaining * to add another link or -ENOMEM if there's insufficient memory. If * KEYCTL_MOVE_EXCL is set, then -EEXIST will be returned if there's already a * matching key in @to_keyring. * * It is assumed that the caller has checked that it is permitted for a link to * be made (the keyring should have Write permission and the key Link * permission). */ int key_move(struct key *key, struct key *from_keyring, struct key *to_keyring, unsigned int flags) { struct assoc_array_edit *from_edit = NULL, *to_edit = NULL; int ret; kenter("%d,%d,%d", key->serial, from_keyring->serial, to_keyring->serial); if (from_keyring == to_keyring) return 0; key_check(key); key_check(from_keyring); key_check(to_keyring); ret = __key_move_lock(from_keyring, to_keyring, &key->index_key); if (ret < 0) goto out; ret = __key_unlink_begin(from_keyring, key, &from_edit); if (ret < 0) goto error; ret = __key_link_begin(to_keyring, &key->index_key, &to_edit); if (ret < 0) goto error; ret = -EEXIST; if (to_edit->dead_leaf && (flags & KEYCTL_MOVE_EXCL)) goto error; ret = __key_link_check_restriction(to_keyring, key); if (ret < 0) goto error; ret = __key_link_check_live_key(to_keyring, key); if (ret < 0) goto error; __key_unlink(from_keyring, key, &from_edit); __key_link(to_keyring, key, &to_edit); error: __key_link_end(to_keyring, &key->index_key, to_edit); __key_unlink_end(from_keyring, key, from_edit); out: kleave(" = %d", ret); return ret; } EXPORT_SYMBOL(key_move); /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. * * Clear the contents of the specified keyring. * * Returns 0 if successful or -ENOTDIR if the keyring isn't a keyring. */ int keyring_clear(struct key *keyring) { struct assoc_array_edit *edit; int ret; if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); if (IS_ERR(edit)) { ret = PTR_ERR(edit); } else { if (edit) assoc_array_apply_edit(edit); notify_key(keyring, NOTIFY_KEY_CLEARED, 0); key_payload_reserve(keyring, 0); ret = 0; } up_write(&keyring->sem); return ret; } EXPORT_SYMBOL(keyring_clear); /* * Dispose of the links from a revoked keyring. * * This is called with the key sem write-locked. */ static void keyring_revoke(struct key *keyring) { struct assoc_array_edit *edit; edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); if (!IS_ERR(edit)) { if (edit) assoc_array_apply_edit(edit); key_payload_reserve(keyring, 0); } } static bool keyring_gc_select_iterator(void *object, void *iterator_data) { struct key *key = keyring_ptr_to_key(object); time64_t *limit = iterator_data; if (key_is_dead(key, *limit)) return false; key_get(key); return true; } static int keyring_gc_check_iterator(const void *object, void *iterator_data) { const struct key *key = keyring_ptr_to_key(object); time64_t *limit = iterator_data; key_check(key); return key_is_dead(key, *limit); } /* * Garbage collect pointers from a keyring. * * Not called with any locks held. The keyring's key struct will not be * deallocated under us as only our caller may deallocate it. */ void keyring_gc(struct key *keyring, time64_t limit) { int result; kenter("%x{%s}", keyring->serial, keyring->description ?: ""); if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) goto dont_gc; /* scan the keyring looking for dead keys */ rcu_read_lock(); result = assoc_array_iterate(&keyring->keys, keyring_gc_check_iterator, &limit); rcu_read_unlock(); if (result == true) goto do_gc; dont_gc: kleave(" [no gc]"); return; do_gc: down_write(&keyring->sem); assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, keyring_gc_select_iterator, &limit); up_write(&keyring->sem); kleave(" [gc]"); } /* * Garbage collect restriction pointers from a keyring. * * Keyring restrictions are associated with a key type, and must be cleaned * up if the key type is unregistered. The restriction is altered to always * reject additional keys so a keyring cannot be opened up by unregistering * a key type. * * Not called with any keyring locks held. The keyring's key struct will not * be deallocated under us as only our caller may deallocate it. * * The caller is required to hold key_types_sem and dead_type->sem. This is * fulfilled by key_gc_keytype() holding the locks on behalf of * key_garbage_collector(), which it invokes on a workqueue. */ void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type) { struct key_restriction *keyres; kenter("%x{%s}", keyring->serial, keyring->description ?: ""); /* * keyring->restrict_link is only assigned at key allocation time * or with the key type locked, so the only values that could be * concurrently assigned to keyring->restrict_link are for key * types other than dead_type. Given this, it's ok to check * the key type before acquiring keyring->sem. */ if (!dead_type || !keyring->restrict_link || keyring->restrict_link->keytype != dead_type) { kleave(" [no restriction gc]"); return; } /* Lock the keyring to ensure that a link is not in progress */ down_write(&keyring->sem); keyres = keyring->restrict_link; keyres->check = restrict_link_reject; key_put(keyres->key); keyres->key = NULL; keyres->keytype = NULL; up_write(&keyring->sem); kleave(" [restriction gc]"); }
15 10 8 8 11 15 15 15 743 468 103 5 37 718 14 703 15 15 4 15 787 833 804 70 9 793 10 807 807 809 723 833 830 1 834 837 836 830 7 834 832 839 2 723 59 59 59 58 55 2 3 37 59 336 565 11 3 865 867 1 59 831 774 584 382 929 579 237 164 874 876 44 1 64 38 869 114 269 718 748 867 863 70 3 70 65 65 2 1 64 32 37 13 2 1 31 1 3 18 3 2 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 // SPDX-License-Identifier: GPL-2.0 #include <linux/pagewalk.h> #include <linux/highmem.h> #include <linux/sched.h> #include <linux/hugetlb.h> #include <linux/mmu_context.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm/tlbflush.h> #include "internal.h" /* * We want to know the real level where a entry is located ignoring any * folding of levels which may be happening. For example if p4d is folded then * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). */ static int real_depth(int depth) { if (depth == 3 && PTRS_PER_PMD == 1) depth = 2; if (depth == 2 && PTRS_PER_PUD == 1) depth = 1; if (depth == 1 && PTRS_PER_P4D == 1) depth = 0; return depth; } static int walk_pte_range_inner(pte_t *pte, unsigned long addr, unsigned long end, struct mm_walk *walk) { const struct mm_walk_ops *ops = walk->ops; int err = 0; for (;;) { if (ops->install_pte && pte_none(ptep_get(pte))) { pte_t new_pte; err = ops->install_pte(addr, addr + PAGE_SIZE, &new_pte, walk); if (err) break; set_pte_at(walk->mm, addr, pte, new_pte); /* Non-present before, so for arches that need it. */ if (!WARN_ON_ONCE(walk->no_vma)) update_mmu_cache(walk->vma, addr, pte); } else { err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) break; } if (addr >= end - PAGE_SIZE) break; addr += PAGE_SIZE; pte++; } return err; } static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { pte_t *pte; int err = 0; spinlock_t *ptl; if (walk->no_vma) { /* * pte_offset_map() might apply user-specific validation. * Indeed, on x86_64 the pmd entries set up by init_espfix_ap() * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear), * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them. */ if (walk->mm == &init_mm || addr >= TASK_SIZE) pte = pte_offset_kernel(pmd, addr); else pte = pte_offset_map(pmd, addr); if (pte) { err = walk_pte_range_inner(pte, addr, end, walk); if (walk->mm != &init_mm && addr < TASK_SIZE) pte_unmap(pte); } } else { pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (pte) { err = walk_pte_range_inner(pte, addr, end, walk); pte_unmap_unlock(pte, ptl); } } if (!pte) walk->action = ACTION_AGAIN; return err; } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { pmd_t *pmd; unsigned long next; const struct mm_walk_ops *ops = walk->ops; bool has_handler = ops->pte_entry; bool has_install = ops->install_pte; int err = 0; int depth = real_depth(3); pmd = pmd_offset(pud, addr); do { again: next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) { if (has_install) err = __pte_alloc(walk->mm, pmd); else if (ops->pte_hole) err = ops->pte_hole(addr, next, depth, walk); if (err) break; if (!has_install) continue; } walk->action = ACTION_SUBTREE; /* * This implies that each ->pmd_entry() handler * needs to know about pmd_trans_huge() pmds */ if (ops->pmd_entry) err = ops->pmd_entry(pmd, addr, next, walk); if (err) break; if (walk->action == ACTION_AGAIN) goto again; if (walk->action == ACTION_CONTINUE) continue; if (!has_handler) { /* No handlers for lower page tables. */ if (!has_install) continue; /* Nothing to do. */ /* * We are ONLY installing, so avoid unnecessarily * splitting a present huge page. */ if (pmd_present(*pmd) && (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) continue; } if (walk->vma) split_huge_pmd(walk->vma, pmd, addr); else if (pmd_leaf(*pmd) || !pmd_present(*pmd)) continue; /* Nothing to do. */ err = walk_pte_range(pmd, addr, next, walk); if (err) break; if (walk->action == ACTION_AGAIN) goto again; } while (pmd++, addr = next, addr != end); return err; } static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, struct mm_walk *walk) { pud_t *pud; unsigned long next; const struct mm_walk_ops *ops = walk->ops; bool has_handler = ops->pmd_entry || ops->pte_entry; bool has_install = ops->install_pte; int err = 0; int depth = real_depth(2); pud = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); if (pud_none(*pud)) { if (has_install) err = __pmd_alloc(walk->mm, pud, addr); else if (ops->pte_hole) err = ops->pte_hole(addr, next, depth, walk); if (err) break; if (!has_install) continue; } walk->action = ACTION_SUBTREE; if (ops->pud_entry) err = ops->pud_entry(pud, addr, next, walk); if (err) break; if (walk->action == ACTION_AGAIN) goto again; if (walk->action == ACTION_CONTINUE) continue; if (!has_handler) { /* No handlers for lower page tables. */ if (!has_install) continue; /* Nothing to do. */ /* * We are ONLY installing, so avoid unnecessarily * splitting a present huge page. */ if (pud_present(*pud) && (pud_trans_huge(*pud) || pud_devmap(*pud))) continue; } if (walk->vma) split_huge_pud(walk->vma, pud, addr); else if (pud_leaf(*pud) || !pud_present(*pud)) continue; /* Nothing to do. */ if (pud_none(*pud)) goto again; err = walk_pmd_range(pud, addr, next, walk); if (err) break; } while (pud++, addr = next, addr != end); return err; } static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, struct mm_walk *walk) { p4d_t *p4d; unsigned long next; const struct mm_walk_ops *ops = walk->ops; bool has_handler = ops->pud_entry || ops->pmd_entry || ops->pte_entry; bool has_install = ops->install_pte; int err = 0; int depth = real_depth(1); p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) { if (has_install) err = __pud_alloc(walk->mm, p4d, addr); else if (ops->pte_hole) err = ops->pte_hole(addr, next, depth, walk); if (err) break; if (!has_install) continue; } if (ops->p4d_entry) { err = ops->p4d_entry(p4d, addr, next, walk); if (err) break; } if (has_handler || has_install) err = walk_pud_range(p4d, addr, next, walk); if (err) break; } while (p4d++, addr = next, addr != end); return err; } static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; const struct mm_walk_ops *ops = walk->ops; bool has_handler = ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry; bool has_install = ops->install_pte; int err = 0; if (walk->pgd) pgd = walk->pgd + pgd_index(addr); else pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (has_install) err = __p4d_alloc(walk->mm, pgd, addr); else if (ops->pte_hole) err = ops->pte_hole(addr, next, 0, walk); if (err) break; if (!has_install) continue; } if (ops->pgd_entry) { err = ops->pgd_entry(pgd, addr, next, walk); if (err) break; } if (has_handler || has_install) err = walk_p4d_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); return err; } #ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, unsigned long end) { unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); return boundary < end ? boundary : end; } static int walk_hugetlb_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); unsigned long sz = huge_page_size(h); pte_t *pte; const struct mm_walk_ops *ops = walk->ops; int err = 0; hugetlb_vma_lock_read(vma); do { next = hugetlb_entry_end(h, addr, end); pte = hugetlb_walk(vma, addr & hmask, sz); if (pte) err = ops->hugetlb_entry(pte, hmask, addr, next, walk); else if (ops->pte_hole) err = ops->pte_hole(addr, next, -1, walk); if (err) break; } while (addr = next, addr != end); hugetlb_vma_unlock_read(vma); return err; } #else /* CONFIG_HUGETLB_PAGE */ static int walk_hugetlb_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { return 0; } #endif /* CONFIG_HUGETLB_PAGE */ /* * Decide whether we really walk over the current vma on [@start, @end) * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. */ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; const struct mm_walk_ops *ops = walk->ops; if (ops->test_walk) return ops->test_walk(start, end, walk); /* * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP * range, so we don't walk over it as we do for normal vmas. However, * Some callers are interested in handling hole range and they don't * want to just ignore any single address range. Such users certainly * define their ->pte_hole() callbacks, so let's delegate them to handle * vma(VM_PFNMAP). */ if (vma->vm_flags & VM_PFNMAP) { int err = 1; if (ops->pte_hole) err = ops->pte_hole(start, end, -1, walk); return err ? err : 1; } return 0; } static int __walk_page_range(unsigned long start, unsigned long end, struct mm_walk *walk) { int err = 0; struct vm_area_struct *vma = walk->vma; const struct mm_walk_ops *ops = walk->ops; bool is_hugetlb = is_vm_hugetlb_page(vma); /* We do not support hugetlb PTE installation. */ if (ops->install_pte && is_hugetlb) return -EINVAL; if (ops->pre_vma) { err = ops->pre_vma(start, end, walk); if (err) return err; } if (is_hugetlb) { if (ops->hugetlb_entry) err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); if (ops->post_vma) ops->post_vma(walk); return err; } static inline void process_mm_walk_lock(struct mm_struct *mm, enum page_walk_lock walk_lock) { if (walk_lock == PGWALK_RDLOCK) mmap_assert_locked(mm); else mmap_assert_write_locked(mm); } static inline void process_vma_walk_lock(struct vm_area_struct *vma, enum page_walk_lock walk_lock) { #ifdef CONFIG_PER_VMA_LOCK switch (walk_lock) { case PGWALK_WRLOCK: vma_start_write(vma); break; case PGWALK_WRLOCK_VERIFY: vma_assert_write_locked(vma); break; case PGWALK_RDLOCK: /* PGWALK_RDLOCK is handled by process_mm_walk_lock */ break; } #endif } /* * See the comment for walk_page_range(), this performs the heavy lifting of the * operation, only sets no restrictions on how the walk proceeds. * * We usually restrict the ability to install PTEs, but this functionality is * available to internal memory management code and provided in mm/internal.h. */ int walk_page_range_mm(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private) { int err = 0; unsigned long next; struct vm_area_struct *vma; struct mm_walk walk = { .ops = ops, .mm = mm, .private = private, }; if (start >= end) return -EINVAL; if (!walk.mm) return -EINVAL; process_mm_walk_lock(walk.mm, ops->walk_lock); vma = find_vma(walk.mm, start); do { if (!vma) { /* after the last vma */ walk.vma = NULL; next = end; if (ops->pte_hole) err = ops->pte_hole(start, next, -1, &walk); } else if (start < vma->vm_start) { /* outside vma */ walk.vma = NULL; next = min(end, vma->vm_start); if (ops->pte_hole) err = ops->pte_hole(start, next, -1, &walk); } else { /* inside vma */ process_vma_walk_lock(vma, ops->walk_lock); walk.vma = vma; next = min(end, vma->vm_end); vma = find_vma(mm, vma->vm_end); err = walk_page_test(start, next, &walk); if (err > 0) { /* * positive return values are purely for * controlling the pagewalk, so should never * be passed to the callers. */ err = 0; continue; } if (err < 0) break; err = __walk_page_range(start, next, &walk); } if (err) break; } while (start = next, start < end); return err; } /* * Determine if the walk operations specified are permitted to be used for a * page table walk. * * This check is performed on all functions which are parameterised by walk * operations and exposed in include/linux/pagewalk.h. * * Internal memory management code can use the walk_page_range_mm() function to * be able to use all page walking operations. */ static bool check_ops_valid(const struct mm_walk_ops *ops) { /* * The installation of PTEs is solely under the control of memory * management logic and subject to many subtle locking, security and * cache considerations so we cannot permit other users to do so, and * certainly not for exported symbols. */ if (ops->install_pte) return false; return true; } /** * walk_page_range - walk page table with caller specific callbacks * @mm: mm_struct representing the target process of page table walk * @start: start address of the virtual address range * @end: end address of the virtual address range * @ops: operation to call during the walk * @private: private data for callbacks' usage * * Recursively walk the page table tree of the process represented by @mm * within the virtual address range [@start, @end). During walking, we can do * some caller-specific works for each entry, by setting up pmd_entry(), * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these * callbacks, the associated entries/pages are just ignored. * The return values of these callbacks are commonly defined like below: * * - 0 : succeeded to handle the current entry, and if you don't reach the * end address yet, continue to walk. * - >0 : succeeded to handle the current entry, and return to the caller * with caller specific value. * - <0 : failed to handle the current entry, and return to the caller * with error code. * * Before starting to walk page table, some callers want to check whether * they really want to walk over the current vma, typically by checking * its vm_flags. walk_page_test() and @ops->test_walk() are used for this * purpose. * * If operations need to be staged before and committed after a vma is walked, * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), * since it is intended to handle commit-type operations, can't return any * errors. * * struct mm_walk keeps current values of some common data like vma and pmd, * which are useful for the access from callbacks. If you want to pass some * caller-specific data to callbacks, @private should be helpful. * * Locking: * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock, * because these function traverse vma list and/or access to vma's data. */ int walk_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private) { if (!check_ops_valid(ops)) return -EINVAL; return walk_page_range_mm(mm, start, end, ops, private); } /** * walk_page_range_novma - walk a range of pagetables not backed by a vma * @mm: mm_struct representing the target process of page table walk * @start: start address of the virtual address range * @end: end address of the virtual address range * @ops: operation to call during the walk * @pgd: pgd to walk if different from mm->pgd * @private: private data for callbacks' usage * * Similar to walk_page_range() but can walk any page tables even if they are * not backed by VMAs. Because 'unusual' entries may be walked this function * will also not lock the PTEs for the pte_entry() callback. This is useful for * walking the kernel pages tables or page tables for firmware. * * Note: Be careful to walk the kernel pages tables, the caller may be need to * take other effective approaches (mmap lock may be insufficient) to prevent * the intermediate kernel page tables belonging to the specified address range * from being freed (e.g. memory hot-remove). */ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private) { struct mm_walk walk = { .ops = ops, .mm = mm, .pgd = pgd, .private = private, .no_vma = true }; if (start >= end || !walk.mm) return -EINVAL; if (!check_ops_valid(ops)) return -EINVAL; /* * 1) For walking the user virtual address space: * * The mmap lock protects the page walker from changes to the page * tables during the walk. However a read lock is insufficient to * protect those areas which don't have a VMA as munmap() detaches * the VMAs before downgrading to a read lock and actually tearing * down PTEs/page tables. In which case, the mmap write lock should * be hold. * * 2) For walking the kernel virtual address space: * * The kernel intermediate page tables usually do not be freed, so * the mmap map read lock is sufficient. But there are some exceptions. * E.g. memory hot-remove. In which case, the mmap lock is insufficient * to prevent the intermediate kernel pages tables belonging to the * specified address range from being freed. The caller should take * other actions to prevent this race. */ if (mm == &init_mm) mmap_assert_locked(walk.mm); else mmap_assert_write_locked(walk.mm); return walk_pgd_range(start, end, &walk); } int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private) { struct mm_walk walk = { .ops = ops, .mm = vma->vm_mm, .vma = vma, .private = private, }; if (start >= end || !walk.mm) return -EINVAL; if (start < vma->vm_start || end > vma->vm_end) return -EINVAL; if (!check_ops_valid(ops)) return -EINVAL; process_mm_walk_lock(walk.mm, ops->walk_lock); process_vma_walk_lock(vma, ops->walk_lock); return __walk_page_range(start, end, &walk); } int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private) { struct mm_walk walk = { .ops = ops, .mm = vma->vm_mm, .vma = vma, .private = private, }; if (!walk.mm) return -EINVAL; if (!check_ops_valid(ops)) return -EINVAL; process_mm_walk_lock(walk.mm, ops->walk_lock); process_vma_walk_lock(vma, ops->walk_lock); return __walk_page_range(vma->vm_start, vma->vm_end, &walk); } /** * walk_page_mapping - walk all memory areas mapped into a struct address_space. * @mapping: Pointer to the struct address_space * @first_index: First page offset in the address_space * @nr: Number of incremental page offsets to cover * @ops: operation to call during the walk * @private: private data for callbacks' usage * * This function walks all memory areas mapped into a struct address_space. * The walk is limited to only the given page-size index range, but if * the index boundaries cross a huge page-table entry, that entry will be * included. * * Also see walk_page_range() for additional information. * * Locking: * This function can't require that the struct mm_struct::mmap_lock is held, * since @mapping may be mapped by multiple processes. Instead * @mapping->i_mmap_rwsem must be held. This might have implications in the * callbacks, and it's up tho the caller to ensure that the * struct mm_struct::mmap_lock is not needed. * * Also this means that a caller can't rely on the struct * vm_area_struct::vm_flags to be constant across a call, * except for immutable flags. Callers requiring this shouldn't use * this function. * * Return: 0 on success, negative error code on failure, positive number on * caller defined premature termination. */ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, pgoff_t nr, const struct mm_walk_ops *ops, void *private) { struct mm_walk walk = { .ops = ops, .private = private, }; struct vm_area_struct *vma; pgoff_t vba, vea, cba, cea; unsigned long start_addr, end_addr; int err = 0; if (!check_ops_valid(ops)) return -EINVAL; lockdep_assert_held(&mapping->i_mmap_rwsem); vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, first_index + nr - 1) { /* Clip to the vma */ vba = vma->vm_pgoff; vea = vba + vma_pages(vma); cba = first_index; cba = max(cba, vba); cea = first_index + nr; cea = min(cea, vea); start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; if (start_addr >= end_addr) continue; walk.vma = vma; walk.mm = vma->vm_mm; err = walk_page_test(vma->vm_start, vma->vm_end, &walk); if (err > 0) { err = 0; break; } else if (err < 0) break; err = __walk_page_range(start_addr, end_addr, &walk); if (err) break; } return err; } /** * folio_walk_start - walk the page tables to a folio * @fw: filled with information on success. * @vma: the VMA. * @addr: the virtual address to use for the page table walk. * @flags: flags modifying which folios to walk to. * * Walk the page tables using @addr in a given @vma to a mapped folio and * return the folio, making sure that the page table entry referenced by * @addr cannot change until folio_walk_end() was called. * * As default, this function returns only folios that are not special (e.g., not * the zeropage) and never returns folios that are supposed to be ignored by the * VM as documented by vm_normal_page(). If requested, zeropages will be * returned as well. * * As default, this function only considers present page table entries. * If requested, it will also consider migration entries. * * If this function returns NULL it might either indicate "there is nothing" or * "there is nothing suitable". * * On success, @fw is filled and the function returns the folio while the PTL * is still held and folio_walk_end() must be called to clean up, * releasing any held locks. The returned folio must *not* be used after the * call to folio_walk_end(), unless a short-term folio reference is taken before * that call. * * @fw->page will correspond to the page that is effectively referenced by * @addr. However, for migration entries and shared zeropages @fw->page is * set to NULL. Note that large folios might be mapped by multiple page table * entries, and this function will always only lookup a single entry as * specified by @addr, which might or might not cover more than a single page of * the returned folio. * * This function must *not* be used as a naive replacement for * get_user_pages() / pin_user_pages(), especially not to perform DMA or * to carelessly modify page content. This function may *only* be used to grab * short-term folio references, never to grab long-term folio references. * * Using the page table entry pointers in @fw for reading or modifying the * entry should be avoided where possible: however, there might be valid * use cases. * * WARNING: Modifying page table entries in hugetlb VMAs requires a lot of care. * For example, PMD page table sharing might require prior unsharing. Also, * logical hugetlb entries might span multiple physical page table entries, * which *must* be modified in a single operation (set_huge_pte_at(), * huge_ptep_set_*, ...). Note that the page table entry stored in @fw might * not correspond to the first physical entry of a logical hugetlb entry. * * The mmap lock must be held in read mode. * * Return: folio pointer on success, otherwise NULL. */ struct folio *folio_walk_start(struct folio_walk *fw, struct vm_area_struct *vma, unsigned long addr, folio_walk_flags_t flags) { unsigned long entry_size; bool expose_page = true; struct page *page; pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep, pte; spinlock_t *ptl; pgd_t *pgdp; p4d_t *p4dp; mmap_assert_locked(vma->vm_mm); vma_pgtable_walk_begin(vma); if (WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end)) goto not_found; pgdp = pgd_offset(vma->vm_mm, addr); if (pgd_none_or_clear_bad(pgdp)) goto not_found; p4dp = p4d_offset(pgdp, addr); if (p4d_none_or_clear_bad(p4dp)) goto not_found; pudp = pud_offset(p4dp, addr); pud = pudp_get(pudp); if (pud_none(pud)) goto not_found; if (IS_ENABLED(CONFIG_PGTABLE_HAS_HUGE_LEAVES) && (!pud_present(pud) || pud_leaf(pud))) { ptl = pud_lock(vma->vm_mm, pudp); pud = pudp_get(pudp); entry_size = PUD_SIZE; fw->level = FW_LEVEL_PUD; fw->pudp = pudp; fw->pud = pud; /* * TODO: FW_MIGRATION support for PUD migration entries * once there are relevant users. */ if (!pud_present(pud) || pud_devmap(pud) || pud_special(pud)) { spin_unlock(ptl); goto not_found; } else if (!pud_leaf(pud)) { spin_unlock(ptl); goto pmd_table; } /* * TODO: vm_normal_page_pud() will be handy once we want to * support PUD mappings in VM_PFNMAP|VM_MIXEDMAP VMAs. */ page = pud_page(pud); goto found; } pmd_table: VM_WARN_ON_ONCE(!pud_present(pud) || pud_leaf(pud)); pmdp = pmd_offset(pudp, addr); pmd = pmdp_get_lockless(pmdp); if (pmd_none(pmd)) goto not_found; if (IS_ENABLED(CONFIG_PGTABLE_HAS_HUGE_LEAVES) && (!pmd_present(pmd) || pmd_leaf(pmd))) { ptl = pmd_lock(vma->vm_mm, pmdp); pmd = pmdp_get(pmdp); entry_size = PMD_SIZE; fw->level = FW_LEVEL_PMD; fw->pmdp = pmdp; fw->pmd = pmd; if (pmd_none(pmd)) { spin_unlock(ptl); goto not_found; } else if (pmd_present(pmd) && !pmd_leaf(pmd)) { spin_unlock(ptl); goto pte_table; } else if (pmd_present(pmd)) { page = vm_normal_page_pmd(vma, addr, pmd); if (page) { goto found; } else if ((flags & FW_ZEROPAGE) && is_huge_zero_pmd(pmd)) { page = pfn_to_page(pmd_pfn(pmd)); expose_page = false; goto found; } } else if ((flags & FW_MIGRATION) && is_pmd_migration_entry(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); page = pfn_swap_entry_to_page(entry); expose_page = false; goto found; } spin_unlock(ptl); goto not_found; } pte_table: VM_WARN_ON_ONCE(!pmd_present(pmd) || pmd_leaf(pmd)); ptep = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl); if (!ptep) goto not_found; pte = ptep_get(ptep); entry_size = PAGE_SIZE; fw->level = FW_LEVEL_PTE; fw->ptep = ptep; fw->pte = pte; if (pte_present(pte)) { page = vm_normal_page(vma, addr, pte); if (page) goto found; if ((flags & FW_ZEROPAGE) && is_zero_pfn(pte_pfn(pte))) { page = pfn_to_page(pte_pfn(pte)); expose_page = false; goto found; } } else if (!pte_none(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); if ((flags & FW_MIGRATION) && is_migration_entry(entry)) { page = pfn_swap_entry_to_page(entry); expose_page = false; goto found; } } pte_unmap_unlock(ptep, ptl); not_found: vma_pgtable_walk_end(vma); return NULL; found: if (expose_page) /* Note: Offset from the mapped page, not the folio start. */ fw->page = nth_page(page, (addr & (entry_size - 1)) >> PAGE_SHIFT); else fw->page = NULL; fw->ptl = ptl; return page_folio(page); }
74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only /* * Common code for control of lockd and nfsv4 grace periods. * * Transplanted from lockd code */ #include <linux/module.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/fs.h> #include <linux/filelock.h> static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); /** * locks_start_grace * @net: net namespace that this lock manager belongs to * @lm: who this grace period is for * * A grace period is a period during which locks should not be given * out. Currently grace periods are only enforced by the two lock * managers (lockd and nfsd), using the locks_in_grace() function to * check when they are in a grace period. * * This function is called to start a grace period. */ void locks_start_grace(struct net *net, struct lock_manager *lm) { struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); if (list_empty(&lm->list)) list_add(&lm->list, grace_list); else WARN(1, "double list_add attempt detected in net %x %s\n", net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); /** * locks_end_grace * @lm: who this grace period is for * * Call this function to state that the given lock manager is ready to * resume regular locking. The grace period will not end until all lock * managers that called locks_start_grace() also call locks_end_grace(). * Note that callers count on it being safe to call this more than once, * and the second call should be a no-op. */ void locks_end_grace(struct lock_manager *lm) { spin_lock(&grace_lock); list_del_init(&lm->list); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_end_grace); static bool __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); struct lock_manager *lm; if (!open) return !list_empty(grace_list); spin_lock(&grace_lock); list_for_each_entry(lm, grace_list, list) { if (lm->block_opens) { spin_unlock(&grace_lock); return true; } } spin_unlock(&grace_lock); return false; } /** * locks_in_grace * @net: network namespace * * Lock managers call this function to determine when it is OK for them * to answer ordinary lock requests, and when they should accept only * lock reclaims. */ bool locks_in_grace(struct net *net) { return __state_in_grace(net, false); } EXPORT_SYMBOL_GPL(locks_in_grace); bool opens_in_grace(struct net *net) { return __state_in_grace(net, true); } EXPORT_SYMBOL_GPL(opens_in_grace); static int __net_init grace_init_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); INIT_LIST_HEAD(grace_list); return 0; } static void __net_exit grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); WARN_ONCE(!list_empty(grace_list), "net %x %s: grace_list is not empty\n", net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { .init = grace_init_net, .exit = grace_exit_net, .id = &grace_net_id, .size = sizeof(struct list_head), }; static int __init init_grace(void) { return register_pernet_subsys(&grace_net_ops); } static void __exit exit_grace(void) { unregister_pernet_subsys(&grace_net_ops); } MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); MODULE_DESCRIPTION("NFS client and server infrastructure"); MODULE_LICENSE("GPL"); module_init(init_grace) module_exit(exit_grace)
67 67 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 #include <linux/export.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/vmalloc.h> /* Allocate an array of spinlocks to be accessed by a hash. Two arguments * indicate the number of elements to allocate in the array. max_size * gives the maximum number of elements to allocate. cpu_mult gives * the number of locks per CPU to allocate. The size is rounded up * to a power of 2 to be suitable as a hash table. */ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, size_t max_size, unsigned int cpu_mult, gfp_t gfp, const char *name, struct lock_class_key *key) { spinlock_t *tlocks = NULL; unsigned int i, size; #if defined(CONFIG_PROVE_LOCKING) unsigned int nr_pcpus = 2; #else unsigned int nr_pcpus = num_possible_cpus(); #endif if (cpu_mult) { nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size); } else { size = max_size; } if (sizeof(spinlock_t) != 0) { tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp); if (!tlocks) return -ENOMEM; for (i = 0; i < size; i++) { spin_lock_init(&tlocks[i]); lockdep_init_map(&tlocks[i].dep_map, name, key, 0); } } *locks = tlocks; *locks_mask = size - 1; return 0; } EXPORT_SYMBOL(__alloc_bucket_spinlocks); void free_bucket_spinlocks(spinlock_t *locks) { kvfree(locks); } EXPORT_SYMBOL(free_bucket_spinlocks);
74 74 74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2002 Intel Corp. * * This file is part of the SCTP kernel implementation * * Sysctl related interfaces for SCTP. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Mingqin Liu <liuming@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <linux/sysctl.h> static int timer_max = 86400000; /* ms in one day */ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; static int rwnd_scale_max = 16; static int rto_alpha_min = 0; static int rto_beta_min = 0; static int rto_alpha_max = 1000; static int rto_beta_max = 1000; static int pf_expose_max = SCTP_PF_EXPOSE_MAX; static int ps_retrans_max = SCTP_PS_RETRANS_MAX; static int udp_port_max = 65535; static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, .proc_handler = proc_doulongvec_minmax }, { .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, .proc_handler = proc_dointvec, }, }; /* The following index defines are used in sctp_sysctl_net_register(). * If you add new items to the sctp_net_table, please ensure that * the index values of these defines hold the same meaning indicated by * their macro names when they appear in sctp_net_table. */ #define SCTP_RTO_MIN_IDX 0 #define SCTP_RTO_MAX_IDX 1 #define SCTP_PF_RETRANS_IDX 2 #define SCTP_PS_RETRANS_IDX 3 static struct ctl_table sctp_net_table[] = { [SCTP_RTO_MIN_IDX] = { .procname = "rto_min", .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_min, .extra1 = SYSCTL_ONE, .extra2 = &init_net.sctp.rto_max }, [SCTP_RTO_MAX_IDX] = { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_max, .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, [SCTP_PF_RETRANS_IDX] = { .procname = "pf_retrans", .data = &init_net.sctp.pf_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &init_net.sctp.ps_retrans, }, [SCTP_PS_RETRANS_IDX] = { .procname = "ps_retrans", .data = &init_net.sctp.ps_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &init_net.sctp.pf_retrans, .extra2 = &ps_retrans_max, }, { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "rto_alpha_exp_divisor", .data = &init_net.sctp.rto_alpha, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_alpha_min, .extra2 = &rto_alpha_max, }, { .procname = "rto_beta_exp_divisor", .data = &init_net.sctp.rto_beta, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_beta_min, .extra2 = &rto_beta_max, }, { .procname = "max_burst", .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "cookie_preserve_enable", .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cookie_hmac_alg", .data = &init_net.sctp.sctp_hmac_alg, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, }, { .procname = "valid_cookie_life", .data = &init_net.sctp.valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "sack_timeout", .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { .procname = "hb_interval", .data = &init_net.sctp.hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "association_max_retrans", .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "path_max_retrans", .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "max_init_retransmits", .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "sndbuf_policy", .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "rcvbuf_policy", .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_enable", .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_noauth_enable", .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "prsctp_enable", .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "reconf_enable", .data = &init_net.sctp.reconf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_auth, }, { .procname = "intl_enable", .data = &init_net.sctp.intl_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ecn_enable", .data = &init_net.sctp.ecn_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "plpmtud_probe_interval", .data = &init_net.sctp.probe_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_probe_interval, }, { .procname = "udp_port", .data = &init_net.sctp.udp_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_udp_port, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "encap_port", .data = &init_net.sctp.encap_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &addr_scope_max, }, { .procname = "rwnd_update_shift", .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &rwnd_scale_max, }, { .procname = "max_autoclose", .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "l3mdev_accept", .data = &init_net.sctp.l3mdev_accept, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "pf_enable", .data = &init_net.sctp.pf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "pf_expose", .data = &init_net.sctp.pf_expose, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &pf_expose_max, }, }; static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; bool changed = false; char *none = "none"; char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; changed = true; } if (!changed) ret = -EINVAL; } return ret; } static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_min; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_min = new_value; } return ret; } static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_max; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_max = new_value; } return ret; } static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write) pr_warn_once("Changing rto_alpha or rto_beta may lead to " "suboptimal rtt/srtt estimations!\n"); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); } static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; int new_value, ret; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.auth_enable; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; net->sctp.auth_enable = new_value; /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->ep->auth_enable = new_value; release_sock(sk); } return ret; } static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *)ctl->extra1; unsigned int max = *(unsigned int *)ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.udp_port; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; if (new_value > max || new_value < min) return -EINVAL; net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { ret = sctp_udp_sock_start(net); if (ret) net->sctp.udp_port = 0; } /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); } return ret; } static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.probe_interval; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value && new_value < SCTP_PROBE_TIMER_MIN) return -EINVAL; net->sctp.probe_interval = new_value; } return ret; } int sctp_sysctl_net_register(struct net *net) { size_t table_size = ARRAY_SIZE(sctp_net_table); struct ctl_table *table; int i; table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); if (!table) return -ENOMEM; for (i = 0; i < table_size; i++) table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min; table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans; table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp", table, table_size); if (net->sctp.sysctl_header == NULL) { kfree(table); return -ENOMEM; } return 0; } void sctp_sysctl_net_unregister(struct net *net) { const struct ctl_table *table; table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); kfree(table); } static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) { sctp_sysctl_header = register_net_sysctl(&init_net, "net/sctp", sctp_table); } /* Sysctl deregistration. */ void sctp_sysctl_unregister(void) { unregister_net_sysctl_table(sctp_sysctl_header); }
4 4 11 7 1 1 9 9 9 1 1 1 1 2332 1 1937 139 4 7 7 899 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 /* SPDX-License-Identifier: GPL-2.0 */ /* * Generic nexthop implementation * * Copyright (c) 2017-19 Cumulus Networks * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> */ #ifndef __LINUX_NEXTHOP_H #define __LINUX_NEXTHOP_H #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/route.h> #include <linux/types.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/netlink.h> #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK struct nexthop; struct nh_config { u32 nh_id; u8 nh_family; u8 nh_protocol; u8 nh_blackhole; u8 nh_fdb; u32 nh_flags; int nh_ifindex; struct net_device *dev; union { __be32 ipv4; struct in6_addr ipv6; } gw; struct nlattr *nh_grp; u16 nh_grp_type; u16 nh_grp_res_num_buckets; unsigned long nh_grp_res_idle_timer; unsigned long nh_grp_res_unbalanced_timer; bool nh_grp_res_has_num_buckets; bool nh_grp_res_has_idle_timer; bool nh_grp_res_has_unbalanced_timer; bool nh_hw_stats; struct nlattr *nh_encap; u16 nh_encap_type; u32 nlflags; struct nl_info nlinfo; }; struct nh_info { struct hlist_node dev_hash; /* entry on netns devhash */ struct nexthop *nh_parent; u8 family; bool reject_nh; bool fdb_nh; union { struct fib_nh_common fib_nhc; struct fib_nh fib_nh; struct fib6_nh fib6_nh; }; }; struct nh_res_bucket { struct nh_grp_entry __rcu *nh_entry; atomic_long_t used_time; unsigned long migrated_time; bool occupied; u8 nh_flags; }; struct nh_res_table { struct net *net; u32 nhg_id; struct delayed_work upkeep_dw; /* List of NHGEs that have too few buckets ("uw" for underweight). * Reclaimed buckets will be given to entries in this list. */ struct list_head uw_nh_entries; unsigned long unbalanced_since; u32 idle_timer; u32 unbalanced_timer; u16 num_nh_buckets; struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; struct nh_grp_entry_stats { u64_stats_t packets; struct u64_stats_sync syncp; }; struct nh_grp_entry { struct nexthop *nh; struct nh_grp_entry_stats __percpu *stats; u16 weight; union { struct { atomic_t upper_bound; } hthr; struct { /* Member on uw_nh_entries. */ struct list_head uw_nh_entry; u16 count_buckets; u16 wants_buckets; } res; }; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ u64 packets_hw; }; struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool is_multipath; bool hash_threshold; bool resilient; bool fdb_nh; bool has_v4; bool hw_stats; struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[] __counted_by(num_nh); }; struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; u32 id; u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; refcount_t refcnt; struct rcu_head rcu; union { struct nh_info __rcu *nh_info; struct nh_group __rcu *nh_grp; }; }; enum nexthop_event_type { NEXTHOP_EVENT_DEL, NEXTHOP_EVENT_REPLACE, NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, NEXTHOP_EVENT_BUCKET_REPLACE, NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, }; enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_SINGLE, NH_NOTIFIER_INFO_TYPE_GRP, NH_NOTIFIER_INFO_TYPE_RES_TABLE, NH_NOTIFIER_INFO_TYPE_RES_BUCKET, NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, }; struct nh_notifier_single_info { struct net_device *dev; u8 gw_family; union { __be32 ipv4; struct in6_addr ipv6; }; u32 id; u8 is_reject:1, is_fdb:1, has_encap:1; }; struct nh_notifier_grp_entry_info { u16 weight; struct nh_notifier_single_info nh; }; struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; bool hw_stats; struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; struct nh_notifier_res_bucket_info { u16 bucket_index; unsigned int idle_timer_ms; bool force; struct nh_notifier_single_info old_nh; struct nh_notifier_single_info new_nh; }; struct nh_notifier_res_table_info { u16 num_nh_buckets; bool hw_stats; struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; struct nh_notifier_grp_hw_stats_entry_info { u32 id; u64 packets; }; struct nh_notifier_grp_hw_stats_info { u16 num_nh; bool hw_stats_used; struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); }; struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; u32 id; enum nh_notifier_info_type type; union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; struct nh_notifier_res_table_info *nh_res_table; struct nh_notifier_res_bucket_info *nh_res_bucket; struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; }; }; int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, bool offload, bool trap); void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, unsigned long *activity); void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, unsigned int nh_idx, u64 delta_packets); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); void nexthop_free_rcu(struct rcu_head *head); static inline bool nexthop_get(struct nexthop *nh) { return refcount_inc_not_zero(&nh->refcnt); } static inline void nexthop_put(struct nexthop *nh) { if (refcount_dec_and_test(&nh->refcnt)) call_rcu_hurry(&nh->rcu, nexthop_free_rcu); } static inline bool nexthop_cmp(const struct nexthop *nh1, const struct nexthop *nh2) { return nh1 == nh2; } static inline bool nexthop_is_fdb(const struct nexthop *nh) { if (nh->is_group) { const struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->fdb_nh; } else { const struct nh_info *nhi; nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->fdb_nh; } } static inline bool nexthop_has_v4(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->has_v4; } return false; } static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); return nh_grp->is_multipath; } return false; } struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); static inline unsigned int nexthop_num_path(const struct nexthop *nh) { unsigned int rc = 1; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->is_multipath) rc = nh_grp->num_nh; } return rc; } static inline struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) { /* for_nexthops macros in fib_semantics.c grabs a pointer to * the nexthop before checking nhsel */ if (nhsel >= nhg->num_nh) return NULL; return nhg->nh_entries[nhsel].nh; } static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } return 0; } /* called with rcu lock */ static inline bool nexthop_is_blackhole(const struct nexthop *nh) { const struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->num_nh > 1) return false; nh = nh_grp->nh_entries[0].nh; } nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->reject_nh; } static inline void nexthop_path_fib_result(struct fib_result *res, int hash) { struct nh_info *nhi; struct nexthop *nh; nh = nexthop_select_path(res->fi->nh, hash); nhi = rcu_dereference(nh->nh_info); res->nhc = &nhi->fib_nhc; } /* called with rcu read lock or rtnl held */ static inline struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) { struct nh_info *nhi; BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); if (nh_grp->is_multipath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; } } nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } /* called from fib_table_lookup with rcu_lock */ static inline struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, int fib_flags, const struct flowi4 *flp, int *nhsel) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = i; return &nhi->fib_nhc; } } } else { nhi = rcu_dereference(nh->nh_info); if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { *nhsel = 0; return &nhi->fib_nhc; } } return NULL; } static inline bool nexthop_uses_dev(const struct nexthop *nh, const struct net_device *dev) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nhg = rcu_dereference(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { struct nexthop *nhe = nhg->nh_entries[i].nh; nhi = rcu_dereference(nhe->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } } else { nhi = rcu_dereference(nh->nh_info); if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) return true; } return false; } static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) return nexthop_num_path(fi->nh); return fi->fib_nhs; } int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack); static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) { if (unlikely(fi->nh)) return nexthop_fib_nhc(fi->nh, nhsel); return &fi->fib_nh[nhsel].nh_common; } /* only used when fib_nh is built into fib_info */ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) { WARN_ON(fi->nh); return &fi->fib_nh[nhsel]; } /* * IPv6 variants */ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack); /* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->family == AF_INET6) return &nhi->fib6_nh; return NULL; } static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; return fib6_nh->fib_nh_dev; } static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) { struct nexthop *nh = res->f6i->nh; struct nh_info *nhi; nh = nexthop_select_path(nh, hash); nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->reject_nh) { res->fib6_type = RTN_BLACKHOLE; res->fib6_flags |= RTF_REJECT; res->nh = nexthop_fib6_nh(nh); } else { res->nh = &nhi->fib6_nh; } } int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); static inline int nexthop_get_family(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->family; } static inline struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) { struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); return &nhi->fib_nhc; } static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, int hash) { struct nh_info *nhi; struct nexthop *nhp; nhp = nexthop_select_path(nh, hash); if (unlikely(!nhp)) return NULL; nhi = rcu_dereference(nhp->nh_info); return &nhi->fib_nhc; } #endif
14 212 210 2 2 205 3 8 206 4 7 205 8 5 212 11 4 206 216 216 216 215 4 4 4 226 221 6 3 4 5 223 2 4 1 213 208 4 207 5 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 /* * Copyright (C) 2011-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/errno.h> #include <linux/export.h> #include <linux/kernel.h> #include <drm/drm_mode.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> /** * drm_rect_intersect - intersect two rectangles * @r1: first rectangle * @r2: second rectangle * * Calculate the intersection of rectangles @r1 and @r2. * @r1 will be overwritten with the intersection. * * RETURNS: * %true if rectangle @r1 is still visible after the operation, * %false otherwise. */ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2) { r1->x1 = max(r1->x1, r2->x1); r1->y1 = max(r1->y1, r2->y1); r1->x2 = min(r1->x2, r2->x2); r1->y2 = min(r1->y2, r2->y2); return drm_rect_visible(r1); } EXPORT_SYMBOL(drm_rect_intersect); static u32 clip_scaled(int src, int dst, int *clip) { u64 tmp; if (dst == 0) return 0; /* Only clip what we have. Keeps the result bounded. */ *clip = min(*clip, dst); tmp = mul_u32_u32(src, dst - *clip); /* * Round toward 1.0 when clipping so that we don't accidentally * change upscaling to downscaling or vice versa. */ if (src < (dst << 16)) return DIV_ROUND_UP_ULL(tmp, dst); else return DIV_ROUND_DOWN_ULL(tmp, dst); } /** * drm_rect_clip_scaled - perform a scaled clip operation * @src: source window rectangle * @dst: destination window rectangle * @clip: clip rectangle * * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by * the corresponding amounts, retaining the vertical and horizontal scaling * factors from @src to @dst. * * RETURNS: * %true if rectangle @dst is still visible after being clipped, * %false otherwise. */ bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, const struct drm_rect *clip) { int diff; diff = clip->x1 - dst->x1; if (diff > 0) { u32 new_src_w = clip_scaled(drm_rect_width(src), drm_rect_width(dst), &diff); src->x1 = src->x2 - new_src_w; dst->x1 += diff; } diff = clip->y1 - dst->y1; if (diff > 0) { u32 new_src_h = clip_scaled(drm_rect_height(src), drm_rect_height(dst), &diff); src->y1 = src->y2 - new_src_h; dst->y1 += diff; } diff = dst->x2 - clip->x2; if (diff > 0) { u32 new_src_w = clip_scaled(drm_rect_width(src), drm_rect_width(dst), &diff); src->x2 = src->x1 + new_src_w; dst->x2 -= diff; } diff = dst->y2 - clip->y2; if (diff > 0) { u32 new_src_h = clip_scaled(drm_rect_height(src), drm_rect_height(dst), &diff); src->y2 = src->y1 + new_src_h; dst->y2 -= diff; } return drm_rect_visible(dst); } EXPORT_SYMBOL(drm_rect_clip_scaled); static int drm_calc_scale(int src, int dst) { int scale = 0; if (WARN_ON(src < 0 || dst < 0)) return -EINVAL; if (dst == 0) return 0; if (src > (dst << 16)) return DIV_ROUND_UP(src, dst); else scale = src / dst; return scale; } /** * drm_rect_calc_hscale - calculate the horizontal scaling factor * @src: source window rectangle * @dst: destination window rectangle * @min_hscale: minimum allowed horizontal scaling factor * @max_hscale: maximum allowed horizontal scaling factor * * Calculate the horizontal scaling factor as * (@src width) / (@dst width). * * If the scale is below 1 << 16, round down. If the scale is above * 1 << 16, round up. This will calculate the scale with the most * pessimistic limit calculation. * * RETURNS: * The horizontal scaling factor, or errno of out of limits. */ int drm_rect_calc_hscale(const struct drm_rect *src, const struct drm_rect *dst, int min_hscale, int max_hscale) { int src_w = drm_rect_width(src); int dst_w = drm_rect_width(dst); int hscale = drm_calc_scale(src_w, dst_w); if (hscale < 0 || dst_w == 0) return hscale; if (hscale < min_hscale || hscale > max_hscale) return -ERANGE; return hscale; } EXPORT_SYMBOL(drm_rect_calc_hscale); /** * drm_rect_calc_vscale - calculate the vertical scaling factor * @src: source window rectangle * @dst: destination window rectangle * @min_vscale: minimum allowed vertical scaling factor * @max_vscale: maximum allowed vertical scaling factor * * Calculate the vertical scaling factor as * (@src height) / (@dst height). * * If the scale is below 1 << 16, round down. If the scale is above * 1 << 16, round up. This will calculate the scale with the most * pessimistic limit calculation. * * RETURNS: * The vertical scaling factor, or errno of out of limits. */ int drm_rect_calc_vscale(const struct drm_rect *src, const struct drm_rect *dst, int min_vscale, int max_vscale) { int src_h = drm_rect_height(src); int dst_h = drm_rect_height(dst); int vscale = drm_calc_scale(src_h, dst_h); if (vscale < 0 || dst_h == 0) return vscale; if (vscale < min_vscale || vscale > max_vscale) return -ERANGE; return vscale; } EXPORT_SYMBOL(drm_rect_calc_vscale); /** * drm_rect_debug_print - print the rectangle information * @prefix: prefix string * @r: rectangle to print * @fixed_point: rectangle is in 16.16 fixed point format */ void drm_rect_debug_print(const char *prefix, const struct drm_rect *r, bool fixed_point) { if (fixed_point) DRM_DEBUG_KMS("%s" DRM_RECT_FP_FMT "\n", prefix, DRM_RECT_FP_ARG(r)); else DRM_DEBUG_KMS("%s" DRM_RECT_FMT "\n", prefix, DRM_RECT_ARG(r)); } EXPORT_SYMBOL(drm_rect_debug_print); /** * drm_rect_rotate - Rotate the rectangle * @r: rectangle to be rotated * @width: Width of the coordinate space * @height: Height of the coordinate space * @rotation: Transformation to be applied * * Apply @rotation to the coordinates of rectangle @r. * * @width and @height combined with @rotation define * the location of the new origin. * * @width correcsponds to the horizontal and @height * to the vertical axis of the untransformed coordinate * space. */ void drm_rect_rotate(struct drm_rect *r, int width, int height, unsigned int rotation) { struct drm_rect tmp; if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) { tmp = *r; if (rotation & DRM_MODE_REFLECT_X) { r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; } if (rotation & DRM_MODE_REFLECT_Y) { r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; } } switch (rotation & DRM_MODE_ROTATE_MASK) { case DRM_MODE_ROTATE_0: break; case DRM_MODE_ROTATE_90: tmp = *r; r->x1 = tmp.y1; r->x2 = tmp.y2; r->y1 = width - tmp.x2; r->y2 = width - tmp.x1; break; case DRM_MODE_ROTATE_180: tmp = *r; r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; break; case DRM_MODE_ROTATE_270: tmp = *r; r->x1 = height - tmp.y2; r->x2 = height - tmp.y1; r->y1 = tmp.x1; r->y2 = tmp.x2; break; default: break; } } EXPORT_SYMBOL(drm_rect_rotate); /** * drm_rect_rotate_inv - Inverse rotate the rectangle * @r: rectangle to be rotated * @width: Width of the coordinate space * @height: Height of the coordinate space * @rotation: Transformation whose inverse is to be applied * * Apply the inverse of @rotation to the coordinates * of rectangle @r. * * @width and @height combined with @rotation define * the location of the new origin. * * @width correcsponds to the horizontal and @height * to the vertical axis of the original untransformed * coordinate space, so that you never have to flip * them when doing a rotatation and its inverse. * That is, if you do :: * * drm_rect_rotate(&r, width, height, rotation); * drm_rect_rotate_inv(&r, width, height, rotation); * * you will always get back the original rectangle. */ void drm_rect_rotate_inv(struct drm_rect *r, int width, int height, unsigned int rotation) { struct drm_rect tmp; switch (rotation & DRM_MODE_ROTATE_MASK) { case DRM_MODE_ROTATE_0: break; case DRM_MODE_ROTATE_90: tmp = *r; r->x1 = width - tmp.y2; r->x2 = width - tmp.y1; r->y1 = tmp.x1; r->y2 = tmp.x2; break; case DRM_MODE_ROTATE_180: tmp = *r; r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; break; case DRM_MODE_ROTATE_270: tmp = *r; r->x1 = tmp.y1; r->x2 = tmp.y2; r->y1 = height - tmp.x2; r->y2 = height - tmp.x1; break; default: break; } if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) { tmp = *r; if (rotation & DRM_MODE_REFLECT_X) { r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; } if (rotation & DRM_MODE_REFLECT_Y) { r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; } } } EXPORT_SYMBOL(drm_rect_rotate_inv);
12 1731 1730 133 1734 1728 1724 1922 1885 9 7 7 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip_tunnels.h> #include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, METADATA_XFRM, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct macsec_info { sci_t sci; }; struct xfrm_md_info { u32 if_id; int link; struct dst_entry *dst_orig; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; struct xfrm_md_info xfrm_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) { return (struct xfrm_md_info *)lwt->data; } static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; dst = skb_dst(skb); if (dst && dst->lwtstate && dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) return lwt_xfrm_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); case METADATA_XFRM: return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, sizeof(a->u.xfrm_info)); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size, /* metadata_dst_alloc() reserves room (md_size bytes) for * options right after the ip_tunnel_info struct. */); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { int ret; ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); if (ret) { metadata_dst_free(new_md); return ERR_PTR(ret); } } #endif skb_dst_drop(skb); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */
51 4824 126 1036 48 776 1078 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FILELOCK_H #define _LINUX_FILELOCK_H #include <linux/fs.h> #define FL_POSIX 1 #define FL_FLOCK 2 #define FL_DELEG 4 /* NFSv4 delegation */ #define FL_ACCESS 8 /* not trying to lock, just looking */ #define FL_EXISTS 16 /* when unlocking, test for existence */ #define FL_LEASE 32 /* lease held on this file */ #define FL_CLOSE 64 /* unlock on close */ #define FL_SLEEP 128 /* A blocking lock */ #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ #define FL_RECLAIM 4096 /* reclaiming from a reboot server */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) /* * Special return value from posix_lock_file() and vfs_lock_file() for * asynchronous locking. */ #define FILE_LOCK_DEFERRED 1 struct file_lock; struct file_lease; struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; struct lock_manager_operations { void *lm_mod_owner; fl_owner_t (*lm_get_owner)(fl_owner_t); void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); bool (*lm_lock_expirable)(struct file_lock *cfl); void (*lm_expire_lock)(void); }; struct lease_manager_operations { bool (*lm_break)(struct file_lease *); int (*lm_change)(struct file_lease *, int, struct list_head *); void (*lm_setup)(struct file_lease *, void **); bool (*lm_breaker_owns_lease)(struct file_lease *); }; struct lock_manager { struct list_head list; /* * NFSv4 and up also want opens blocked during the grace period; * NLM doesn't care: */ bool block_opens; }; struct net; void locks_start_grace(struct net *, struct lock_manager *); void locks_end_grace(struct lock_manager *); bool locks_in_grace(struct net *); bool opens_in_grace(struct net *); /* * struct file_lock has a union that some filesystems use to track * their own private info. The NFS side of things is defined here: */ #include <linux/nfs_fs_i.h> /* * struct file_lock represents a generic "file lock". It's used to represent * POSIX byte range locks, BSD (flock) locks, and leases. It's important to * note that the same struct is used to represent both a request for a lock and * the lock itself, but the same object is never used for both. * * FIXME: should we create a separate "struct lock_request" to help distinguish * these two uses? * * The varous i_flctx lists are ordered by: * * 1) lock owner * 2) lock range start * 3) lock range end * * Obviously, the last two criteria only matter for POSIX locks. */ struct file_lock_core { struct file_lock_core *flc_blocker; /* The lock that is blocking us */ struct list_head flc_list; /* link into file_lock_context */ struct hlist_node flc_link; /* node in global lists */ struct list_head flc_blocked_requests; /* list of requests with * ->fl_blocker pointing here */ struct list_head flc_blocked_member; /* node in * ->fl_blocker->fl_blocked_requests */ fl_owner_t flc_owner; unsigned int flc_flags; unsigned char flc_type; pid_t flc_pid; int flc_link_cpu; /* what cpu's list is this on? */ wait_queue_head_t flc_wait; struct file *flc_file; }; struct file_lock { struct file_lock_core c; loff_t fl_start; loff_t fl_end; const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { struct nfs_lock_info nfs_fl; struct nfs4_lock_info nfs4_fl; struct { struct list_head link; /* link in AFS vnode's pending_locks list */ int state; /* state of grant or error if -ve */ unsigned int debug_id; } afs; struct { struct inode *inode; } ceph; } fl_u; } __randomize_layout; struct file_lease { struct file_lock_core c; struct fasync_struct * fl_fasync; /* for lease break notifications */ /* for lease breaks: */ unsigned long fl_break_time; unsigned long fl_downgrade_time; const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */ } __randomize_layout; struct file_lock_context { spinlock_t flc_lock; struct list_head flc_flock; struct list_head flc_posix; struct list_head flc_lease; }; #ifdef CONFIG_FILE_LOCKING int fcntl_getlk(struct file *, unsigned int, struct flock *); int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock *); #if BITS_PER_LONG == 32 int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 *); #endif int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); static inline bool lock_is_unlock(struct file_lock *fl) { return fl->c.flc_type == F_UNLCK; } static inline bool lock_is_read(struct file_lock *fl) { return fl->c.flc_type == F_RDLCK; } static inline bool lock_is_write(struct file_lock *fl) { return fl->c.flc_type == F_WRLCK; } static inline void locks_wake_up(struct file_lock *fl) { wake_up(&fl->c.flc_wait); } static inline bool locks_can_async_lock(const struct file_operations *fops) { return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK; } /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); void locks_init_lock(struct file_lock *); struct file_lock *locks_alloc_lock(void); void locks_copy_lock(struct file_lock *, struct file_lock *); void locks_copy_conflock(struct file_lock *, struct file_lock *); void locks_remove_posix(struct file *, fl_owner_t); void locks_remove_file(struct file *); void locks_release_private(struct file_lock *); void posix_test_lock(struct file *, struct file_lock *); int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); int locks_delete_block(struct file_lock *); int vfs_test_lock(struct file *, struct file_lock *); int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); int vfs_cancel_lock(struct file *filp, struct file_lock *fl); bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); void locks_init_lease(struct file_lease *); void locks_free_lease(struct file_lease *fl); struct file_lease *locks_alloc_lease(void); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); int generic_setlease(struct file *, int, struct file_lease **, void **priv); int kernel_setlease(struct file *, int, struct file_lease **, void **); int vfs_setlease(struct file *, int, struct file_lease **, void **); int lease_modify(struct file_lease *, int, struct list_head *); struct notifier_block; int lease_register_notifier(struct notifier_block *); void lease_unregister_notifier(struct notifier_block *); struct files_struct; void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); static inline struct file_lock_context * locks_inode_context(const struct inode *inode) { return smp_load_acquire(&inode->i_flctx); } #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) { return -EINVAL; } static inline int fcntl_setlk(unsigned int fd, struct file *file, unsigned int cmd, struct flock __user *user) { return -EACCES; } #if BITS_PER_LONG == 32 static inline int fcntl_getlk64(struct file *file, unsigned int cmd, struct flock64 *user) { return -EINVAL; } static inline int fcntl_setlk64(unsigned int fd, struct file *file, unsigned int cmd, struct flock64 *user) { return -EACCES; } #endif static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { return -EINVAL; } static inline int fcntl_getlease(struct file *filp) { return F_UNLCK; } static inline bool lock_is_unlock(struct file_lock *fl) { return false; } static inline bool lock_is_read(struct file_lock *fl) { return false; } static inline bool lock_is_write(struct file_lock *fl) { return false; } static inline void locks_wake_up(struct file_lock *fl) { } static inline void locks_free_lock_context(struct inode *inode) { } static inline void locks_init_lock(struct file_lock *fl) { return; } static inline void locks_init_lease(struct file_lease *fl) { return; } static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; } static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { return; } static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) { return; } static inline void locks_remove_file(struct file *filp) { return; } static inline void posix_test_lock(struct file *filp, struct file_lock *fl) { return; } static inline int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { return -ENOLCK; } static inline int locks_delete_block(struct file_lock *waiter) { return -ENOENT; } static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) { return 0; } static inline int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { return -ENOLCK; } static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { return 0; } static inline bool vfs_inode_has_locks(struct inode *inode) { return false; } static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { return -ENOLCK; } static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; } static inline void lease_get_mtime(struct inode *inode, struct timespec64 *time) { return; } static inline int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv) { return -EINVAL; } static inline int kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { return -EINVAL; } static inline int vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { return -EINVAL; } static inline int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { return -EINVAL; } struct files_struct; static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner) { return false; } static inline struct file_lock_context * locks_inode_context(const struct inode *inode) { return NULL; } #endif /* !CONFIG_FILE_LOCKING */ /* for walking lists of file_locks linked by fl_list */ #define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list) static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); } #ifdef CONFIG_FILE_LOCKING static inline int break_lease(struct inode *inode, unsigned int mode) { struct file_lock_context *flctx; /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ flctx = READ_ONCE(inode->i_flctx); if (!flctx) return 0; smp_mb(); if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_LEASE); return 0; } static inline int break_deleg(struct inode *inode, unsigned int mode) { struct file_lock_context *flctx; /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ flctx = READ_ONCE(inode->i_flctx); if (!flctx) return 0; smp_mb(); if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_DELEG); return 0; } static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) { int ret; ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); if (ret == -EWOULDBLOCK && delegated_inode) { *delegated_inode = inode; ihold(inode); } return ret; } static inline int break_deleg_wait(struct inode **delegated_inode) { int ret; ret = break_deleg(*delegated_inode, O_WRONLY); iput(*delegated_inode); *delegated_inode = NULL; return ret; } static inline int break_layout(struct inode *inode, bool wait) { smp_mb(); if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) return __break_lease(inode, wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, FL_LAYOUT); return 0; } #else /* !CONFIG_FILE_LOCKING */ static inline int break_lease(struct inode *inode, unsigned int mode) { return 0; } static inline int break_deleg(struct inode *inode, unsigned int mode) { return 0; } static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) { return 0; } static inline int break_deleg_wait(struct inode **delegated_inode) { BUG(); return 0; } static inline int break_layout(struct inode *inode, bool wait) { return 0; } #endif /* CONFIG_FILE_LOCKING */ #endif /* _LINUX_FILELOCK_H */
1010 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 /* SPDX-License-Identifier: GPL-2.0 * * Network memory * * Author: Mina Almasry <almasrymina@google.com> */ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H #include <linux/mm.h> #include <net/net_debug.h> /* net_iov */ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); /* We overload the LSB of the struct page pointer to indicate whether it's * a page or net_iov. */ #define NET_IOV 0x01UL struct net_iov { unsigned long __unused_padding; unsigned long pp_magic; struct page_pool *pp; struct dmabuf_genpool_chunk_owner *owner; unsigned long dma_addr; atomic_long_t pp_ref_count; }; /* These fields in struct page are used by the page_pool and net stack: * * struct { * unsigned long pp_magic; * struct page_pool *pp; * unsigned long _pp_mapping_pad; * unsigned long dma_addr; * atomic_long_t pp_ref_count; * }; * * We mirror the page_pool fields here so the page_pool can access these fields * without worrying whether the underlying fields belong to a page or net_iov. * * The non-net stack fields of struct page are private to the mm stack and must * never be mirrored to net_iov. */ #define NET_IOV_ASSERT_OFFSET(pg, iov) \ static_assert(offsetof(struct page, pg) == \ offsetof(struct net_iov, iov)) NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); NET_IOV_ASSERT_OFFSET(pp, pp); NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET /* netmem */ /** * typedef netmem_ref - a nonexistent type marking a reference to generic * network memory. * * A netmem_ref currently is always a reference to a struct page. This * abstraction is introduced so support for new memory types can be added. * * Use the supplied helpers to obtain the underlying memory pointer and fields. */ typedef unsigned long __bitwise netmem_ref; static inline bool netmem_is_net_iov(const netmem_ref netmem) { return (__force unsigned long)netmem & NET_IOV; } /* This conversion fails (returns NULL) if the netmem_ref is not struct page * backed. */ static inline struct page *netmem_to_page(netmem_ref netmem) { if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return NULL; return (__force struct page *)netmem; } static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); DEBUG_NET_WARN_ON_ONCE(true); return NULL; } static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) { return (__force netmem_ref)((unsigned long)niov | NET_IOV); } static inline netmem_ref page_to_netmem(struct page *page) { return (__force netmem_ref)page; } static inline int netmem_ref_count(netmem_ref netmem) { /* The non-pp refcount of net_iov is always 1. On net_iov, we only * support pp refcounting which uses the pp_ref_count field. */ if (netmem_is_net_iov(netmem)) return 1; return page_ref_count(netmem_to_page(netmem)); } static inline unsigned long netmem_pfn_trace(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return 0; return page_to_pfn(netmem_to_page(netmem)); } static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) { return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp; } static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) { return &__netmem_clear_lsb(netmem)->pp_ref_count; } static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) { /* NUMA node preference only makes sense if we're allocating * system memory. Memory providers (which give us net_iovs) * choose for us. */ if (netmem_is_net_iov(netmem)) return true; return page_to_nid(netmem_to_page(netmem)) == pref_nid; } static inline netmem_ref netmem_compound_head(netmem_ref netmem) { /* niov are never compounded */ if (netmem_is_net_iov(netmem)) return netmem; return page_to_netmem(compound_head(netmem_to_page(netmem))); } static inline void *netmem_address(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return NULL; return page_address(netmem_to_page(netmem)); } static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->dma_addr; } #endif /* _NET_NETMEM_H */
1 1 2 2 2 2 5 5 5 5 5 2 3 1 2 1 1 1 5 15 7 1 1 1 4 7 3 3 3 3 3 3 3 2 1 2 2 3 7 3 3 6 6 6 2 6 8 8 5 5 4 1 5 4 3 3 3 11 38 1 11 31 1 31 2 2 31 5 28 31 2 30 3 7 26 25 8 15 3 15 11 4 3 11 7 18 18 17 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 // SPDX-License-Identifier: GPL-2.0-or-later /****************************************************************************** * usbtouchscreen.c * Driver for USB Touchscreens, supporting those devices: * - eGalax Touchkit * includes eTurboTouch CT-410/510/700 * - 3M/Microtouch EX II series * - ITM * - PanJit TouchSet * - eTurboTouch * - Gunze AHL61 * - DMC TSC-10/25 * - IRTOUCHSYSTEMS/UNITOP * - IdealTEK URTC1000 * - General Touch * - GoTop Super_Q2/GogoPen/PenPower tablets * - JASTEC USB touch controller/DigiTech DTR-02U * - Zytronic capacitive touchscreen * - NEXIO/iNexio * - Elo TouchSystems 2700 IntelliTouch * - EasyTouch USB Dual/Multi touch controller from Data Modul * * Copyright (C) 2004-2007 by Daniel Ritz <daniel.ritz@gmx.ch> * Copyright (C) by Todd E. Johnson (mtouchusb.c) * * Driver is based on touchkitusb.c * - ITM parts are from itmtouch.c * - 3M parts are from mtouchusb.c * - PanJit parts are from an unmerged driver by Lanslott Gish * - DMC TSC 10/25 are from Holger Schurig, with ideas from an unmerged * driver from Marius Vollmer * *****************************************************************************/ //#define DEBUG #include <linux/kernel.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <linux/hid.h> #include <linux/mutex.h> static bool swap_xy; module_param(swap_xy, bool, 0644); MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped."); static bool hwcalib_xy; module_param(hwcalib_xy, bool, 0644); MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available"); /* device specifc data/functions */ struct usbtouch_usb; struct usbtouch_device_info { int min_xc, max_xc; int min_yc, max_yc; int min_press, max_press; int rept_size; /* * Always service the USB devices irq not just when the input device is * open. This is useful when devices have a watchdog which prevents us * from periodically polling the device. Leave this unset unless your * touchscreen device requires it, as it does consume more of the USB * bandwidth. */ bool irq_always; /* * used to get the packet len. possible return values: * > 0: packet len * = 0: skip one byte * < 0: -return value more bytes needed */ int (*get_pkt_len) (unsigned char *pkt, int len); int (*read_data) (struct usbtouch_usb *usbtouch, unsigned char *pkt); int (*alloc) (struct usbtouch_usb *usbtouch); int (*init) (struct usbtouch_usb *usbtouch); void (*exit) (struct usbtouch_usb *usbtouch); }; /* a usbtouch device */ struct usbtouch_usb { unsigned char *data; dma_addr_t data_dma; int data_size; unsigned char *buffer; int buf_len; struct urb *irq; struct usb_interface *interface; struct input_dev *input; const struct usbtouch_device_info *type; struct mutex pm_mutex; /* serialize access to open/suspend */ bool is_open; char name[128]; char phys[64]; void *priv; int x, y; int touch, press; void (*process_pkt)(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len); }; /***************************************************************************** * e2i Part */ #ifdef CONFIG_TOUCHSCREEN_USB_E2I static int e2i_init(struct usbtouch_usb *usbtouch) { int ret; struct usb_device *udev = interface_to_usbdev(usbtouch->interface); ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x01, 0x02, 0x0000, 0x0081, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - E2I_RESET - bytes|err: %d\n", __func__, ret); return ret; } static int e2i_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { int tmp = (pkt[0] << 8) | pkt[1]; dev->x = (pkt[2] << 8) | pkt[3]; dev->y = (pkt[4] << 8) | pkt[5]; tmp = tmp - 0xA000; dev->touch = (tmp > 0); dev->press = (tmp > 0 ? tmp : 0); return 1; } static const struct usbtouch_device_info e2i_dev_info = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 6, .init = e2i_init, .read_data = e2i_read_data, }; #endif /***************************************************************************** * eGalax part */ #ifdef CONFIG_TOUCHSCREEN_USB_EGALAX #ifndef MULTI_PACKET #define MULTI_PACKET #endif #define EGALAX_PKT_TYPE_MASK 0xFE #define EGALAX_PKT_TYPE_REPT 0x80 #define EGALAX_PKT_TYPE_DIAG 0x0A static int egalax_init(struct usbtouch_usb *usbtouch) { struct usb_device *udev = interface_to_usbdev(usbtouch->interface); int ret, i; /* * An eGalax diagnostic packet kicks the device into using the right * protocol. We send a "check active" packet. The response will be * read later and ignored. */ u8 *buf __free(kfree) = kmalloc(3, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = EGALAX_PKT_TYPE_DIAG; buf[1] = 1; /* length */ buf[2] = 'A'; /* command - check active */ for (i = 0; i < 3; i++) { ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, 3, USB_CTRL_SET_TIMEOUT); if (ret != -EPIPE) break; } return ret < 0 ? ret : 0; } static int egalax_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if ((pkt[0] & EGALAX_PKT_TYPE_MASK) != EGALAX_PKT_TYPE_REPT) return 0; dev->x = ((pkt[3] & 0x0F) << 7) | (pkt[4] & 0x7F); dev->y = ((pkt[1] & 0x0F) << 7) | (pkt[2] & 0x7F); dev->touch = pkt[0] & 0x01; return 1; } static int egalax_get_pkt_len(unsigned char *buf, int len) { switch (buf[0] & EGALAX_PKT_TYPE_MASK) { case EGALAX_PKT_TYPE_REPT: return 5; case EGALAX_PKT_TYPE_DIAG: if (len < 2) return -1; return buf[1] + 2; } return 0; } static const struct usbtouch_device_info egalax_dev_info = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 16, .get_pkt_len = egalax_get_pkt_len, .read_data = egalax_read_data, .init = egalax_init, }; #endif /***************************************************************************** * EasyTouch part */ #ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH #ifndef MULTI_PACKET #define MULTI_PACKET #endif #define ETOUCH_PKT_TYPE_MASK 0xFE #define ETOUCH_PKT_TYPE_REPT 0x80 #define ETOUCH_PKT_TYPE_REPT2 0xB0 #define ETOUCH_PKT_TYPE_DIAG 0x0A static int etouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if ((pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT && (pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT2) return 0; dev->x = ((pkt[1] & 0x1F) << 7) | (pkt[2] & 0x7F); dev->y = ((pkt[3] & 0x1F) << 7) | (pkt[4] & 0x7F); dev->touch = pkt[0] & 0x01; return 1; } static int etouch_get_pkt_len(unsigned char *buf, int len) { switch (buf[0] & ETOUCH_PKT_TYPE_MASK) { case ETOUCH_PKT_TYPE_REPT: case ETOUCH_PKT_TYPE_REPT2: return 5; case ETOUCH_PKT_TYPE_DIAG: if (len < 2) return -1; return buf[1] + 2; } return 0; } static const struct usbtouch_device_info etouch_dev_info = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 16, .get_pkt_len = etouch_get_pkt_len, .read_data = etouch_read_data, }; #endif /***************************************************************************** * PanJit Part */ #ifdef CONFIG_TOUCHSCREEN_USB_PANJIT static int panjit_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } static const struct usbtouch_device_info panjit_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .read_data = panjit_read_data, }; #endif /***************************************************************************** * 3M/Microtouch Part */ #ifdef CONFIG_TOUCHSCREEN_USB_3M #define MTOUCHUSB_ASYNC_REPORT 1 #define MTOUCHUSB_RESET 7 #define MTOUCHUSB_REQ_CTRLLR_ID 10 #define MTOUCHUSB_REQ_CTRLLR_ID_LEN 16 static int mtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if (hwcalib_xy) { dev->x = (pkt[4] << 8) | pkt[3]; dev->y = 0xffff - ((pkt[6] << 8) | pkt[5]); } else { dev->x = (pkt[8] << 8) | pkt[7]; dev->y = (pkt[10] << 8) | pkt[9]; } dev->touch = (pkt[2] & 0x40) ? 1 : 0; return 1; } struct mtouch_priv { u8 fw_rev_major; u8 fw_rev_minor; }; static int mtouch_get_fw_revision(struct usbtouch_usb *usbtouch) { struct usb_device *udev = interface_to_usbdev(usbtouch->interface); struct mtouch_priv *priv = usbtouch->priv; int ret; u8 *buf __free(kfree) = kzalloc(MTOUCHUSB_REQ_CTRLLR_ID_LEN, GFP_NOIO); if (!buf) return -ENOMEM; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), MTOUCHUSB_REQ_CTRLLR_ID, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, MTOUCHUSB_REQ_CTRLLR_ID_LEN, USB_CTRL_SET_TIMEOUT); if (ret != MTOUCHUSB_REQ_CTRLLR_ID_LEN) { dev_warn(&usbtouch->interface->dev, "Failed to read FW rev: %d\n", ret); return ret < 0 ? ret : -EIO; } priv->fw_rev_major = buf[3]; priv->fw_rev_minor = buf[4]; return 0; } static int mtouch_alloc(struct usbtouch_usb *usbtouch) { struct mtouch_priv *priv; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; usbtouch->priv = priv; return 0; } static int mtouch_init(struct usbtouch_usb *usbtouch) { int ret, i; struct usb_device *udev = interface_to_usbdev(usbtouch->interface); ret = mtouch_get_fw_revision(usbtouch); if (ret) return ret; ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), MTOUCHUSB_RESET, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 1, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - MTOUCHUSB_RESET - bytes|err: %d\n", __func__, ret); if (ret < 0) return ret; msleep(150); for (i = 0; i < 3; i++) { ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), MTOUCHUSB_ASYNC_REPORT, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 1, 1, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - MTOUCHUSB_ASYNC_REPORT - bytes|err: %d\n", __func__, ret); if (ret >= 0) break; if (ret != -EPIPE) return ret; } /* Default min/max xy are the raw values, override if using hw-calib */ if (hwcalib_xy) { input_set_abs_params(usbtouch->input, ABS_X, 0, 0xffff, 0, 0); input_set_abs_params(usbtouch->input, ABS_Y, 0, 0xffff, 0, 0); } return 0; } static void mtouch_exit(struct usbtouch_usb *usbtouch) { struct mtouch_priv *priv = usbtouch->priv; kfree(priv); } static struct usbtouch_device_info mtouch_dev_info = { .min_xc = 0x0, .max_xc = 0x4000, .min_yc = 0x0, .max_yc = 0x4000, .rept_size = 11, .read_data = mtouch_read_data, .alloc = mtouch_alloc, .init = mtouch_init, .exit = mtouch_exit, }; static ssize_t mtouch_firmware_rev_show(struct device *dev, struct device_attribute *attr, char *output) { struct usb_interface *intf = to_usb_interface(dev); struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); struct mtouch_priv *priv = usbtouch->priv; return sysfs_emit(output, "%1x.%1x\n", priv->fw_rev_major, priv->fw_rev_minor); } static DEVICE_ATTR(firmware_rev, 0444, mtouch_firmware_rev_show, NULL); static struct attribute *mtouch_attrs[] = { &dev_attr_firmware_rev.attr, NULL }; static bool mtouch_group_visible(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); struct usb_interface *intf = to_usb_interface(dev); struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); return usbtouch->type == &mtouch_dev_info; } DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(mtouch); static const struct attribute_group mtouch_attr_group = { .is_visible = SYSFS_GROUP_VISIBLE(mtouch), .attrs = mtouch_attrs, }; #endif /***************************************************************************** * ITM Part */ #ifdef CONFIG_TOUCHSCREEN_USB_ITM static int itm_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { int touch; /* * ITM devices report invalid x/y data if not touched. * if the screen was touched before but is not touched any more * report touch as 0 with the last valid x/y data once. then stop * reporting data until touched again. */ dev->press = ((pkt[2] & 0x01) << 7) | (pkt[5] & 0x7F); touch = ~pkt[7] & 0x20; if (!touch) { if (dev->touch) { dev->touch = 0; return 1; } return 0; } dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[3] & 0x7F); dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[4] & 0x7F); dev->touch = touch; return 1; } static const struct usbtouch_device_info itm_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .max_press = 0xff, .rept_size = 8, .read_data = itm_read_data, }; #endif /***************************************************************************** * eTurboTouch part */ #ifdef CONFIG_TOUCHSCREEN_USB_ETURBO #ifndef MULTI_PACKET #define MULTI_PACKET #endif static int eturbo_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { unsigned int shift; /* packets should start with sync */ if (!(pkt[0] & 0x80)) return 0; shift = (6 - (pkt[0] & 0x03)); dev->x = ((pkt[3] << 7) | pkt[4]) >> shift; dev->y = ((pkt[1] << 7) | pkt[2]) >> shift; dev->touch = (pkt[0] & 0x10) ? 1 : 0; return 1; } static int eturbo_get_pkt_len(unsigned char *buf, int len) { if (buf[0] & 0x80) return 5; if (buf[0] == 0x01) return 3; return 0; } static const struct usbtouch_device_info eturbo_dev_info = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 8, .get_pkt_len = eturbo_get_pkt_len, .read_data = eturbo_read_data, }; #endif /***************************************************************************** * Gunze part */ #ifdef CONFIG_TOUCHSCREEN_USB_GUNZE static int gunze_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if (!(pkt[0] & 0x80) || ((pkt[1] | pkt[2] | pkt[3]) & 0x80)) return 0; dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[2] & 0x7F); dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[3] & 0x7F); dev->touch = pkt[0] & 0x20; return 1; } static const struct usbtouch_device_info gunze_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 4, .read_data = gunze_read_data, }; #endif /***************************************************************************** * DMC TSC-10/25 Part * * Documentation about the controller and it's protocol can be found at * http://www.dmccoltd.com/files/controler/tsc10usb_pi_e.pdf * http://www.dmccoltd.com/files/controler/tsc25_usb_e.pdf */ #ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10 /* supported data rates. currently using 130 */ #define TSC10_RATE_POINT 0x50 #define TSC10_RATE_30 0x40 #define TSC10_RATE_50 0x41 #define TSC10_RATE_80 0x42 #define TSC10_RATE_100 0x43 #define TSC10_RATE_130 0x44 #define TSC10_RATE_150 0x45 /* commands */ #define TSC10_CMD_RESET 0x55 #define TSC10_CMD_RATE 0x05 #define TSC10_CMD_DATA1 0x01 static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) { struct usb_device *dev = interface_to_usbdev(usbtouch->interface); int ret; u8 *buf __free(kfree) = kmalloc(2, GFP_NOIO); if (!buf) return -ENOMEM; /* reset */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), TSC10_CMD_RESET, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) return ret; if (buf[0] != 0x06) return -ENODEV; /* TSC-25 data sheet specifies a delay after the RESET command */ msleep(150); /* set coordinate output rate */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), TSC10_CMD_RATE, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) return ret; if (buf[0] != 0x06 && (buf[0] != 0x15 || buf[1] != 0x01)) return -ENODEV; /* start sending data */ return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), TSC10_CMD_DATA1, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } static int dmc_tsc10_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x03) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x03) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } static const struct usbtouch_device_info dmc_tsc10_dev_info = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 5, .init = dmc_tsc10_init, .read_data = dmc_tsc10_read_data, }; #endif /***************************************************************************** * IRTOUCH Part */ #ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH static int irtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[3] << 8) | pkt[2]; dev->y = (pkt[5] << 8) | pkt[4]; dev->touch = (pkt[1] & 0x03) ? 1 : 0; return 1; } static const struct usbtouch_device_info irtouch_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .read_data = irtouch_read_data, }; static const struct usbtouch_device_info irtouch_hires_dev_info = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 8, .read_data = irtouch_read_data, }; #endif /***************************************************************************** * ET&T TC5UH/TC4UM part */ #ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB static int tc45usb_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } static const struct usbtouch_device_info tc45usb_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 5, .read_data = tc45usb_read_data, }; #endif /***************************************************************************** * IdealTEK URTC1000 Part */ #ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK #ifndef MULTI_PACKET #define MULTI_PACKET #endif static int idealtek_get_pkt_len(unsigned char *buf, int len) { if (buf[0] & 0x80) return 5; if (buf[0] == 0x01) return len; return 0; } static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { switch (pkt[0] & 0x98) { case 0x88: /* touch data in IdealTEK mode */ dev->x = (pkt[1] << 5) | (pkt[2] >> 2); dev->y = (pkt[3] << 5) | (pkt[4] >> 2); dev->touch = (pkt[0] & 0x40) ? 1 : 0; return 1; case 0x98: /* touch data in MT emulation mode */ dev->x = (pkt[2] << 5) | (pkt[1] >> 2); dev->y = (pkt[4] << 5) | (pkt[3] >> 2); dev->touch = (pkt[0] & 0x40) ? 1 : 0; return 1; default: return 0; } } static const struct usbtouch_device_info idealtek_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .get_pkt_len = idealtek_get_pkt_len, .read_data = idealtek_read_data, }; #endif /***************************************************************************** * General Touch Part */ #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[2] << 8) | pkt[1]; dev->y = (pkt[4] << 8) | pkt[3]; dev->press = pkt[5] & 0xff; dev->touch = pkt[0] & 0x01; return 1; } static const struct usbtouch_device_info general_touch_dev_info = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 7, .read_data = general_touch_read_data, }; #endif /***************************************************************************** * GoTop Part */ #ifdef CONFIG_TOUCHSCREEN_USB_GOTOP static int gotop_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[1] & 0x38) << 4) | pkt[2]; dev->y = ((pkt[1] & 0x07) << 7) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } static const struct usbtouch_device_info gotop_dev_info = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 4, .read_data = gotop_read_data, }; #endif /***************************************************************************** * JASTEC Part */ #ifdef CONFIG_TOUCHSCREEN_USB_JASTEC static int jastec_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[0] & 0x3f) << 6) | (pkt[2] & 0x3f); dev->y = ((pkt[1] & 0x3f) << 6) | (pkt[3] & 0x3f); dev->touch = (pkt[0] & 0x40) >> 6; return 1; } static const struct usbtouch_device_info jastec_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 4, .read_data = jastec_read_data, }; #endif /***************************************************************************** * Zytronic Part */ #ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC static int zytronic_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { struct usb_interface *intf = dev->interface; switch (pkt[0]) { case 0x3A: /* command response */ dev_dbg(&intf->dev, "%s: Command response %d\n", __func__, pkt[1]); break; case 0xC0: /* down */ dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7); dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7); dev->touch = 1; dev_dbg(&intf->dev, "%s: down %d,%d\n", __func__, dev->x, dev->y); return 1; case 0x80: /* up */ dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7); dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7); dev->touch = 0; dev_dbg(&intf->dev, "%s: up %d,%d\n", __func__, dev->x, dev->y); return 1; default: dev_dbg(&intf->dev, "%s: Unknown return %d\n", __func__, pkt[0]); break; } return 0; } static const struct usbtouch_device_info zytronic_dev_info = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 5, .read_data = zytronic_read_data, .irq_always = true, }; #endif /***************************************************************************** * NEXIO Part */ #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO #define NEXIO_TIMEOUT 5000 #define NEXIO_BUFSIZE 1024 #define NEXIO_THRESHOLD 50 struct nexio_priv { struct urb *ack; unsigned char *ack_buf; }; struct nexio_touch_packet { u8 flags; /* 0xe1 = touch, 0xe1 = release */ __be16 data_len; /* total bytes of touch data */ __be16 x_len; /* bytes for X axis */ __be16 y_len; /* bytes for Y axis */ u8 data[]; } __attribute__ ((packed)); static unsigned char nexio_ack_pkt[2] = { 0xaa, 0x02 }; static unsigned char nexio_init_pkt[4] = { 0x82, 0x04, 0x0a, 0x0f }; static void nexio_ack_complete(struct urb *urb) { } static int nexio_alloc(struct usbtouch_usb *usbtouch) { struct nexio_priv *priv; int ret = -ENOMEM; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_buf; usbtouch->priv = priv; priv->ack_buf = kmemdup(nexio_ack_pkt, sizeof(nexio_ack_pkt), GFP_KERNEL); if (!priv->ack_buf) goto err_priv; priv->ack = usb_alloc_urb(0, GFP_KERNEL); if (!priv->ack) { dev_dbg(&usbtouch->interface->dev, "%s - usb_alloc_urb failed: usbtouch->ack\n", __func__); goto err_ack_buf; } return 0; err_ack_buf: kfree(priv->ack_buf); err_priv: kfree(priv); out_buf: return ret; } static int nexio_init(struct usbtouch_usb *usbtouch) { struct usb_device *dev = interface_to_usbdev(usbtouch->interface); struct usb_host_interface *interface = usbtouch->interface->cur_altsetting; struct nexio_priv *priv = usbtouch->priv; int ret = -ENOMEM; int actual_len, i; char *firmware_ver = NULL, *device_name = NULL; int input_ep = 0, output_ep = 0; /* find first input and output endpoint */ for (i = 0; i < interface->desc.bNumEndpoints; i++) { if (!input_ep && usb_endpoint_dir_in(&interface->endpoint[i].desc)) input_ep = interface->endpoint[i].desc.bEndpointAddress; if (!output_ep && usb_endpoint_dir_out(&interface->endpoint[i].desc)) output_ep = interface->endpoint[i].desc.bEndpointAddress; } if (!input_ep || !output_ep) return -ENXIO; u8 *buf __free(kfree) = kmalloc(NEXIO_BUFSIZE, GFP_NOIO); if (!buf) return -ENOMEM; /* two empty reads */ for (i = 0; i < 2; i++) { ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep), buf, NEXIO_BUFSIZE, &actual_len, NEXIO_TIMEOUT); if (ret < 0) return ret; } /* send init command */ memcpy(buf, nexio_init_pkt, sizeof(nexio_init_pkt)); ret = usb_bulk_msg(dev, usb_sndbulkpipe(dev, output_ep), buf, sizeof(nexio_init_pkt), &actual_len, NEXIO_TIMEOUT); if (ret < 0) return ret; /* read replies */ for (i = 0; i < 3; i++) { memset(buf, 0, NEXIO_BUFSIZE); ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep), buf, NEXIO_BUFSIZE, &actual_len, NEXIO_TIMEOUT); if (ret < 0 || actual_len < 1 || buf[1] != actual_len) continue; switch (buf[0]) { case 0x83: /* firmware version */ if (!firmware_ver) firmware_ver = kstrdup(&buf[2], GFP_NOIO); break; case 0x84: /* device name */ if (!device_name) device_name = kstrdup(&buf[2], GFP_NOIO); break; } } printk(KERN_INFO "Nexio device: %s, firmware version: %s\n", device_name, firmware_ver); kfree(firmware_ver); kfree(device_name); usb_fill_bulk_urb(priv->ack, dev, usb_sndbulkpipe(dev, output_ep), priv->ack_buf, sizeof(nexio_ack_pkt), nexio_ack_complete, usbtouch); return 0; } static void nexio_exit(struct usbtouch_usb *usbtouch) { struct nexio_priv *priv = usbtouch->priv; usb_kill_urb(priv->ack); usb_free_urb(priv->ack); kfree(priv->ack_buf); kfree(priv); } static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt) { struct device *dev = &usbtouch->interface->dev; struct nexio_touch_packet *packet = (void *) pkt; struct nexio_priv *priv = usbtouch->priv; unsigned int data_len = be16_to_cpu(packet->data_len); unsigned int x_len = be16_to_cpu(packet->x_len); unsigned int y_len = be16_to_cpu(packet->y_len); int x, y, begin_x, begin_y, end_x, end_y, w, h, ret; /* got touch data? */ if ((pkt[0] & 0xe0) != 0xe0) return 0; if (data_len > 0xff) data_len -= 0x100; if (x_len > 0xff) x_len -= 0x80; /* send ACK */ ret = usb_submit_urb(priv->ack, GFP_ATOMIC); if (ret) dev_warn(dev, "Failed to submit ACK URB: %d\n", ret); if (!input_abs_get_max(usbtouch->input, ABS_X)) { input_set_abs_params(usbtouch->input, ABS_X, 0, 2 * x_len, 0, 0); input_set_abs_params(usbtouch->input, ABS_Y, 0, 2 * y_len, 0, 0); } /* * The device reports state of IR sensors on X and Y axes. * Each byte represents "darkness" percentage (0-100) of one element. * 17" touchscreen reports only 64 x 52 bytes so the resolution is low. * This also means that there's a limited multi-touch capability but * it's disabled (and untested) here as there's no X driver for that. */ begin_x = end_x = begin_y = end_y = -1; for (x = 0; x < x_len; x++) { if (begin_x == -1 && packet->data[x] > NEXIO_THRESHOLD) { begin_x = x; continue; } if (end_x == -1 && begin_x != -1 && packet->data[x] < NEXIO_THRESHOLD) { end_x = x - 1; for (y = x_len; y < data_len; y++) { if (begin_y == -1 && packet->data[y] > NEXIO_THRESHOLD) { begin_y = y - x_len; continue; } if (end_y == -1 && begin_y != -1 && packet->data[y] < NEXIO_THRESHOLD) { end_y = y - 1 - x_len; w = end_x - begin_x; h = end_y - begin_y; #if 0 /* multi-touch */ input_report_abs(usbtouch->input, ABS_MT_TOUCH_MAJOR, max(w,h)); input_report_abs(usbtouch->input, ABS_MT_TOUCH_MINOR, min(x,h)); input_report_abs(usbtouch->input, ABS_MT_POSITION_X, 2*begin_x+w); input_report_abs(usbtouch->input, ABS_MT_POSITION_Y, 2*begin_y+h); input_report_abs(usbtouch->input, ABS_MT_ORIENTATION, w > h); input_mt_sync(usbtouch->input); #endif /* single touch */ usbtouch->x = 2 * begin_x + w; usbtouch->y = 2 * begin_y + h; usbtouch->touch = packet->flags & 0x01; begin_y = end_y = -1; return 1; } } begin_x = end_x = -1; } } return 0; } static const struct usbtouch_device_info nexio_dev_info = { .rept_size = 1024, .irq_always = true, .read_data = nexio_read_data, .alloc = nexio_alloc, .init = nexio_init, .exit = nexio_exit, }; #endif /***************************************************************************** * ELO part */ #ifdef CONFIG_TOUCHSCREEN_USB_ELO static int elo_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[3] << 8) | pkt[2]; dev->y = (pkt[5] << 8) | pkt[4]; dev->touch = pkt[6] > 0; dev->press = pkt[6]; return 1; } static const struct usbtouch_device_info elo_dev_info = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .max_press = 0xff, .rept_size = 8, .read_data = elo_read_data, }; #endif /***************************************************************************** * Generic Part */ static void usbtouch_process_pkt(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len) { const struct usbtouch_device_info *type = usbtouch->type; if (!type->read_data(usbtouch, pkt)) return; input_report_key(usbtouch->input, BTN_TOUCH, usbtouch->touch); if (swap_xy) { input_report_abs(usbtouch->input, ABS_X, usbtouch->y); input_report_abs(usbtouch->input, ABS_Y, usbtouch->x); } else { input_report_abs(usbtouch->input, ABS_X, usbtouch->x); input_report_abs(usbtouch->input, ABS_Y, usbtouch->y); } if (type->max_press) input_report_abs(usbtouch->input, ABS_PRESSURE, usbtouch->press); input_sync(usbtouch->input); } #ifdef MULTI_PACKET static void usbtouch_process_multi(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len) { unsigned char *buffer; int pkt_len, pos, buf_len, tmp; /* process buffer */ if (unlikely(usbtouch->buf_len)) { /* try to get size */ pkt_len = usbtouch->type->get_pkt_len( usbtouch->buffer, usbtouch->buf_len); /* drop? */ if (unlikely(!pkt_len)) goto out_flush_buf; /* need to append -pkt_len bytes before able to get size */ if (unlikely(pkt_len < 0)) { int append = -pkt_len; if (unlikely(append > len)) append = len; if (usbtouch->buf_len + append >= usbtouch->type->rept_size) goto out_flush_buf; memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, append); usbtouch->buf_len += append; pkt_len = usbtouch->type->get_pkt_len( usbtouch->buffer, usbtouch->buf_len); if (pkt_len < 0) return; } /* append */ tmp = pkt_len - usbtouch->buf_len; if (usbtouch->buf_len + tmp >= usbtouch->type->rept_size) goto out_flush_buf; memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, tmp); usbtouch_process_pkt(usbtouch, usbtouch->buffer, pkt_len); buffer = pkt + tmp; buf_len = len - tmp; } else { buffer = pkt; buf_len = len; } /* loop over the received packet, process */ pos = 0; while (pos < buf_len) { /* get packet len */ pkt_len = usbtouch->type->get_pkt_len(buffer + pos, buf_len - pos); /* unknown packet: skip one byte */ if (unlikely(!pkt_len)) { pos++; continue; } /* full packet: process */ if (likely((pkt_len > 0) && (pkt_len <= buf_len - pos))) { usbtouch_process_pkt(usbtouch, buffer + pos, pkt_len); } else { /* incomplete packet: save in buffer */ memcpy(usbtouch->buffer, buffer + pos, buf_len - pos); usbtouch->buf_len = buf_len - pos; return; } pos += pkt_len; } out_flush_buf: usbtouch->buf_len = 0; return; } #else static void usbtouch_process_multi(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len) { dev_WARN_ONCE(&usbtouch->interface->dev, 1, "Protocol has ->get_pkt_len() without #define MULTI_PACKET"); } #endif static void usbtouch_irq(struct urb *urb) { struct usbtouch_usb *usbtouch = urb->context; struct device *dev = &usbtouch->interface->dev; int retval; switch (urb->status) { case 0: /* success */ break; case -ETIME: /* this urb is timing out */ dev_dbg(dev, "%s - urb timed out - was the device unplugged?\n", __func__); return; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPIPE: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } usbtouch->process_pkt(usbtouch, usbtouch->data, urb->actual_length); exit: usb_mark_last_busy(interface_to_usbdev(usbtouch->interface)); retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result: %d\n", __func__, retval); } static int usbtouch_start_io(struct usbtouch_usb *usbtouch) { guard(mutex)(&usbtouch->pm_mutex); if (!usbtouch->type->irq_always) if (usb_submit_urb(usbtouch->irq, GFP_KERNEL)) return -EIO; usbtouch->interface->needs_remote_wakeup = 1; usbtouch->is_open = true; return 0; } static int usbtouch_open(struct input_dev *input) { struct usbtouch_usb *usbtouch = input_get_drvdata(input); int r; usbtouch->irq->dev = interface_to_usbdev(usbtouch->interface); r = usb_autopm_get_interface(usbtouch->interface) ? -EIO : 0; if (r) return r; r = usbtouch_start_io(usbtouch); usb_autopm_put_interface(usbtouch->interface); return r; } static void usbtouch_close(struct input_dev *input) { struct usbtouch_usb *usbtouch = input_get_drvdata(input); int r; scoped_guard(mutex, &usbtouch->pm_mutex) { if (!usbtouch->type->irq_always) usb_kill_urb(usbtouch->irq); usbtouch->is_open = false; } r = usb_autopm_get_interface(usbtouch->interface); usbtouch->interface->needs_remote_wakeup = 0; if (!r) usb_autopm_put_interface(usbtouch->interface); } static int usbtouch_suspend(struct usb_interface *intf, pm_message_t message) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); usb_kill_urb(usbtouch->irq); return 0; } static int usbtouch_resume(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); guard(mutex)(&usbtouch->pm_mutex); if (usbtouch->is_open || usbtouch->type->irq_always) return usb_submit_urb(usbtouch->irq, GFP_NOIO); return 0; } static int usbtouch_reset_resume(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); int err; /* reinit the device */ if (usbtouch->type->init) { err = usbtouch->type->init(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->init() failed, err: %d\n", __func__, err); return err; } } /* restart IO if needed */ guard(mutex)(&usbtouch->pm_mutex); if (usbtouch->is_open) return usb_submit_urb(usbtouch->irq, GFP_NOIO); return 0; } static void usbtouch_free_buffers(struct usb_device *udev, struct usbtouch_usb *usbtouch) { usb_free_coherent(udev, usbtouch->data_size, usbtouch->data, usbtouch->data_dma); kfree(usbtouch->buffer); } static struct usb_endpoint_descriptor * usbtouch_get_input_endpoint(struct usb_host_interface *interface) { int i; for (i = 0; i < interface->desc.bNumEndpoints; i++) if (usb_endpoint_dir_in(&interface->endpoint[i].desc)) return &interface->endpoint[i].desc; return NULL; } static int usbtouch_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usbtouch_usb *usbtouch; struct input_dev *input_dev; struct usb_endpoint_descriptor *endpoint; struct usb_device *udev = interface_to_usbdev(intf); const struct usbtouch_device_info *type; int err = -ENOMEM; /* some devices are ignored */ type = (const struct usbtouch_device_info *)id->driver_info; if (!type) return -ENODEV; endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting); if (!endpoint) return -ENXIO; usbtouch = kzalloc(sizeof(*usbtouch), GFP_KERNEL); input_dev = input_allocate_device(); if (!usbtouch || !input_dev) goto out_free; mutex_init(&usbtouch->pm_mutex); usbtouch->type = type; usbtouch->data_size = type->rept_size; if (type->get_pkt_len) { /* * When dealing with variable-length packets we should * not request more than wMaxPacketSize bytes at once * as we do not know if there is more data coming or * we filled exactly wMaxPacketSize bytes and there is * nothing else. */ usbtouch->data_size = min(usbtouch->data_size, usb_endpoint_maxp(endpoint)); } usbtouch->data = usb_alloc_coherent(udev, usbtouch->data_size, GFP_KERNEL, &usbtouch->data_dma); if (!usbtouch->data) goto out_free; if (type->get_pkt_len) { usbtouch->buffer = kmalloc(type->rept_size, GFP_KERNEL); if (!usbtouch->buffer) goto out_free_buffers; usbtouch->process_pkt = usbtouch_process_multi; } else { usbtouch->process_pkt = usbtouch_process_pkt; } usbtouch->irq = usb_alloc_urb(0, GFP_KERNEL); if (!usbtouch->irq) { dev_dbg(&intf->dev, "%s - usb_alloc_urb failed: usbtouch->irq\n", __func__); goto out_free_buffers; } usbtouch->interface = intf; usbtouch->input = input_dev; if (udev->manufacturer) strscpy(usbtouch->name, udev->manufacturer, sizeof(usbtouch->name)); if (udev->product) { if (udev->manufacturer) strlcat(usbtouch->name, " ", sizeof(usbtouch->name)); strlcat(usbtouch->name, udev->product, sizeof(usbtouch->name)); } if (!strlen(usbtouch->name)) snprintf(usbtouch->name, sizeof(usbtouch->name), "USB Touchscreen %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); usb_make_path(udev, usbtouch->phys, sizeof(usbtouch->phys)); strlcat(usbtouch->phys, "/input0", sizeof(usbtouch->phys)); input_dev->name = usbtouch->name; input_dev->phys = usbtouch->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &intf->dev; input_set_drvdata(input_dev, usbtouch); input_dev->open = usbtouch_open; input_dev->close = usbtouch_close; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0); input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0); if (type->max_press) input_set_abs_params(input_dev, ABS_PRESSURE, type->min_press, type->max_press, 0, 0); if (usb_endpoint_type(endpoint) == USB_ENDPOINT_XFER_INT) usb_fill_int_urb(usbtouch->irq, udev, usb_rcvintpipe(udev, endpoint->bEndpointAddress), usbtouch->data, usbtouch->data_size, usbtouch_irq, usbtouch, endpoint->bInterval); else usb_fill_bulk_urb(usbtouch->irq, udev, usb_rcvbulkpipe(udev, endpoint->bEndpointAddress), usbtouch->data, usbtouch->data_size, usbtouch_irq, usbtouch); usbtouch->irq->dev = udev; usbtouch->irq->transfer_dma = usbtouch->data_dma; usbtouch->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* device specific allocations */ if (type->alloc) { err = type->alloc(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->alloc() failed, err: %d\n", __func__, err); goto out_free_urb; } } /* device specific initialisation*/ if (type->init) { err = type->init(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->init() failed, err: %d\n", __func__, err); goto out_do_exit; } } err = input_register_device(usbtouch->input); if (err) { dev_dbg(&intf->dev, "%s - input_register_device failed, err: %d\n", __func__, err); goto out_do_exit; } usb_set_intfdata(intf, usbtouch); if (usbtouch->type->irq_always) { /* this can't fail */ usb_autopm_get_interface(intf); err = usb_submit_urb(usbtouch->irq, GFP_KERNEL); if (err) { usb_autopm_put_interface(intf); dev_err(&intf->dev, "%s - usb_submit_urb failed with result: %d\n", __func__, err); goto out_unregister_input; } } return 0; out_unregister_input: input_unregister_device(input_dev); input_dev = NULL; out_do_exit: if (type->exit) type->exit(usbtouch); out_free_urb: usb_free_urb(usbtouch->irq); out_free_buffers: usbtouch_free_buffers(udev, usbtouch); out_free: input_free_device(input_dev); kfree(usbtouch); return err; } static void usbtouch_disconnect(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); if (!usbtouch) return; dev_dbg(&intf->dev, "%s - usbtouch is initialized, cleaning up\n", __func__); usb_set_intfdata(intf, NULL); /* this will stop IO via close */ input_unregister_device(usbtouch->input); usb_free_urb(usbtouch->irq); if (usbtouch->type->exit) usbtouch->type->exit(usbtouch); usbtouch_free_buffers(interface_to_usbdev(intf), usbtouch); kfree(usbtouch); } static const struct attribute_group *usbtouch_groups[] = { #ifdef CONFIG_TOUCHSCREEN_USB_3M &mtouch_attr_group, #endif NULL }; static const struct usb_device_id usbtouch_devices[] = { #ifdef CONFIG_TOUCHSCREEN_USB_EGALAX /* ignore the HID capable devices, handled by usbhid */ { USB_DEVICE_INTERFACE_CLASS(0x0eef, 0x0001, USB_INTERFACE_CLASS_HID), .driver_info = 0 }, { USB_DEVICE_INTERFACE_CLASS(0x0eef, 0x0002, USB_INTERFACE_CLASS_HID), .driver_info = 0 }, /* normal device IDs */ { USB_DEVICE(0x3823, 0x0001), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x3823, 0x0002), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x0123, 0x0001), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x0eef, 0x0001), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x0eef, 0x0002), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x1234, 0x0001), .driver_info = (kernel_ulong_t)&egalax_dev_info }, { USB_DEVICE(0x1234, 0x0002), .driver_info = (kernel_ulong_t)&egalax_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_PANJIT { USB_DEVICE(0x134c, 0x0001), .driver_info = (kernel_ulong_t)&panjit_dev_info }, { USB_DEVICE(0x134c, 0x0002), .driver_info = (kernel_ulong_t)&panjit_dev_info }, { USB_DEVICE(0x134c, 0x0003), .driver_info = (kernel_ulong_t)&panjit_dev_info }, { USB_DEVICE(0x134c, 0x0004), .driver_info = (kernel_ulong_t)&panjit_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_3M { USB_DEVICE(0x0596, 0x0001), .driver_info = (kernel_ulong_t)&mtouch_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ITM { USB_DEVICE(0x0403, 0xf9e9), .driver_info = (kernel_ulong_t)&itm_dev_info }, { USB_DEVICE(0x16e3, 0xf9e9), .driver_info = (kernel_ulong_t)&itm_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETURBO { USB_DEVICE(0x1234, 0x5678), .driver_info = (kernel_ulong_t)&eturbo_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GUNZE { USB_DEVICE(0x0637, 0x0001), .driver_info = (kernel_ulong_t)&gunze_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10 { USB_DEVICE(0x0afa, 0x03e8), .driver_info = (kernel_ulong_t)&dmc_tsc10_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH { USB_DEVICE(0x255e, 0x0001), .driver_info = (kernel_ulong_t)&irtouch_dev_info }, { USB_DEVICE(0x595a, 0x0001), .driver_info = (kernel_ulong_t)&irtouch_dev_info }, { USB_DEVICE(0x6615, 0x0001), .driver_info = (kernel_ulong_t)&irtouch_dev_info }, { USB_DEVICE(0x6615, 0x0012), .driver_info = (kernel_ulong_t)&irtouch_hires_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK { USB_DEVICE(0x1391, 0x1000), .driver_info = (kernel_ulong_t)&idealtek_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH { USB_DEVICE(0x0dfc, 0x0001), .driver_info = (kernel_ulong_t)&general_touch_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GOTOP { USB_DEVICE(0x08f2, 0x007f), .driver_info = (kernel_ulong_t)&gotop_dev_info }, { USB_DEVICE(0x08f2, 0x00ce), .driver_info = (kernel_ulong_t)&gotop_dev_info }, { USB_DEVICE(0x08f2, 0x00f4), .driver_info = (kernel_ulong_t)&gotop_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_JASTEC { USB_DEVICE(0x0f92, 0x0001), .driver_info = (kernel_ulong_t)&jastec_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_E2I { USB_DEVICE(0x1ac7, 0x0001), .driver_info = (kernel_ulong_t)&e2i_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC { USB_DEVICE(0x14c8, 0x0003), .driver_info = (kernel_ulong_t)&zytronic_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB /* TC5UH */ { USB_DEVICE(0x0664, 0x0309), .driver_info = (kernel_ulong_t)&tc45usb_dev_info }, /* TC4UM */ { USB_DEVICE(0x0664, 0x0306), .driver_info = (kernel_ulong_t)&tc45usb_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO /* data interface only */ { USB_DEVICE_AND_INTERFACE_INFO(0x10f0, 0x2002, 0x0a, 0x00, 0x00), .driver_info = (kernel_ulong_t)&nexio_dev_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x1870, 0x0001, 0x0a, 0x00, 0x00), .driver_info = (kernel_ulong_t)&nexio_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ELO { USB_DEVICE(0x04e7, 0x0020), .driver_info = (kernel_ulong_t)&elo_dev_info }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH { USB_DEVICE(0x7374, 0x0001), .driver_info = (kernel_ulong_t)&etouch_dev_info }, #endif { } }; MODULE_DEVICE_TABLE(usb, usbtouch_devices); static struct usb_driver usbtouch_driver = { .name = "usbtouchscreen", .probe = usbtouch_probe, .disconnect = usbtouch_disconnect, .suspend = usbtouch_suspend, .resume = usbtouch_resume, .reset_resume = usbtouch_reset_resume, .id_table = usbtouch_devices, .dev_groups = usbtouch_groups, .supports_autosuspend = 1, }; module_usb_driver(usbtouch_driver); MODULE_AUTHOR("Daniel Ritz <daniel.ritz@gmx.ch>"); MODULE_DESCRIPTION("USB Touchscreen Driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("touchkitusb"); MODULE_ALIAS("itmtouch"); MODULE_ALIAS("mtouchusb");
1 1 1 1 5 2 3 1 3 2 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 // SPDX-License-Identifier: GPL-2.0-or-later /* * Infrared Toy and IR Droid RC core driver * * Copyright (C) 2020 Sean Young <sean@mess.org> * * http://dangerousprototypes.com/docs/USB_IR_Toy:_Sampling_mode * * This driver is based on the lirc driver which can be found here: * https://sourceforge.net/p/lirc/git/ci/master/tree/plugins/irtoy.c * Copyright (C) 2011 Peter Kooiman <pkooiman@gmail.com> */ #include <linux/unaligned.h> #include <linux/completion.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/usb/input.h> #include <media/rc-core.h> static const u8 COMMAND_VERSION[] = { 'v' }; // End transmit and repeat reset command so we exit sump mode static const u8 COMMAND_RESET[] = { 0xff, 0xff, 0, 0, 0, 0, 0 }; static const u8 COMMAND_SMODE_ENTER[] = { 's' }; static const u8 COMMAND_SMODE_EXIT[] = { 0 }; static const u8 COMMAND_TXSTART[] = { 0x26, 0x24, 0x25, 0x03 }; #define REPLY_XMITCOUNT 't' #define REPLY_XMITSUCCESS 'C' #define REPLY_VERSION 'V' #define REPLY_SAMPLEMODEPROTO 'S' #define TIMEOUT 500 #define LEN_XMITRES 3 #define LEN_VERSION 4 #define LEN_SAMPLEMODEPROTO 3 #define MIN_FW_VERSION 20 #define UNIT_US 21 #define MAX_TIMEOUT_US (UNIT_US * U16_MAX) #define MAX_PACKET 64 enum state { STATE_IRDATA, STATE_COMMAND_NO_RESP, STATE_COMMAND, STATE_TX, }; struct irtoy { struct device *dev; struct usb_device *usbdev; struct rc_dev *rc; struct urb *urb_in, *urb_out; u8 *in; u8 *out; struct completion command_done; bool pulse; enum state state; void *tx_buf; uint tx_len; uint emitted; uint hw_version; uint sw_version; uint proto_version; char phys[64]; }; static void irtoy_response(struct irtoy *irtoy, u32 len) { switch (irtoy->state) { case STATE_COMMAND: if (len == LEN_VERSION && irtoy->in[0] == REPLY_VERSION) { uint version; irtoy->in[LEN_VERSION] = 0; if (kstrtouint(irtoy->in + 1, 10, &version)) { dev_err(irtoy->dev, "invalid version %*phN. Please make sure you are using firmware v20 or higher", LEN_VERSION, irtoy->in); break; } dev_dbg(irtoy->dev, "version %s\n", irtoy->in); irtoy->hw_version = version / 100; irtoy->sw_version = version % 100; irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } else if (len == LEN_SAMPLEMODEPROTO && irtoy->in[0] == REPLY_SAMPLEMODEPROTO) { uint version; irtoy->in[LEN_SAMPLEMODEPROTO] = 0; if (kstrtouint(irtoy->in + 1, 10, &version)) { dev_err(irtoy->dev, "invalid sample mode response %*phN", LEN_SAMPLEMODEPROTO, irtoy->in); return; } dev_dbg(irtoy->dev, "protocol %s\n", irtoy->in); irtoy->proto_version = version; irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } else { dev_err(irtoy->dev, "unexpected response to command: %*phN\n", len, irtoy->in); } break; case STATE_COMMAND_NO_RESP: case STATE_IRDATA: { struct ir_raw_event rawir = { .pulse = irtoy->pulse }; __be16 *in = (__be16 *)irtoy->in; int i; for (i = 0; i < len / sizeof(__be16); i++) { u16 v = be16_to_cpu(in[i]); if (v == 0xffff) { rawir.pulse = false; } else { rawir.duration = v * UNIT_US; ir_raw_event_store_with_timeout(irtoy->rc, &rawir); } rawir.pulse = !rawir.pulse; } irtoy->pulse = rawir.pulse; ir_raw_event_handle(irtoy->rc); break; } case STATE_TX: if (irtoy->tx_len == 0) { if (len == LEN_XMITRES && irtoy->in[0] == REPLY_XMITCOUNT) { u16 emitted = get_unaligned_be16(irtoy->in + 1); dev_dbg(irtoy->dev, "emitted:%u\n", emitted); irtoy->emitted = emitted; } else if (len == 1 && irtoy->in[0] == REPLY_XMITSUCCESS) { irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } } else { // send next part of tx buffer uint space = irtoy->in[0]; uint buf_len; int err; if (len != 1 || space > MAX_PACKET || space == 0) { dev_dbg(irtoy->dev, "packet length expected: %*phN\n", len, irtoy->in); break; } buf_len = min(space, irtoy->tx_len); dev_dbg(irtoy->dev, "remaining:%u sending:%u\n", irtoy->tx_len, buf_len); memcpy(irtoy->out, irtoy->tx_buf, buf_len); irtoy->urb_out->transfer_buffer_length = buf_len; err = usb_submit_urb(irtoy->urb_out, GFP_ATOMIC); if (err != 0) { dev_err(irtoy->dev, "fail to submit tx buf urb: %d\n", err); irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); break; } irtoy->tx_buf += buf_len; irtoy->tx_len -= buf_len; } break; } } static void irtoy_out_callback(struct urb *urb) { struct irtoy *irtoy = urb->context; if (urb->status == 0) { if (irtoy->state == STATE_COMMAND_NO_RESP) complete(&irtoy->command_done); } else { dev_warn(irtoy->dev, "out urb status: %d\n", urb->status); } } static void irtoy_in_callback(struct urb *urb) { struct irtoy *irtoy = urb->context; int ret; switch (urb->status) { case 0: irtoy_response(irtoy, urb->actual_length); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPROTO: case -EPIPE: usb_unlink_urb(urb); return; default: dev_dbg(irtoy->dev, "in urb status: %d\n", urb->status); } ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret && ret != -ENODEV) dev_warn(irtoy->dev, "failed to resubmit urb: %d\n", ret); } static int irtoy_command(struct irtoy *irtoy, const u8 *cmd, int cmd_len, enum state state) { int err; init_completion(&irtoy->command_done); irtoy->state = state; memcpy(irtoy->out, cmd, cmd_len); irtoy->urb_out->transfer_buffer_length = cmd_len; err = usb_submit_urb(irtoy->urb_out, GFP_KERNEL); if (err != 0) return err; if (!wait_for_completion_timeout(&irtoy->command_done, msecs_to_jiffies(TIMEOUT))) { usb_kill_urb(irtoy->urb_out); return -ETIMEDOUT; } return 0; } static int irtoy_setup(struct irtoy *irtoy) { int err; err = irtoy_command(irtoy, COMMAND_RESET, sizeof(COMMAND_RESET), STATE_COMMAND_NO_RESP); if (err != 0) { dev_err(irtoy->dev, "could not write reset command: %d\n", err); return err; } usleep_range(50, 50); // get version err = irtoy_command(irtoy, COMMAND_VERSION, sizeof(COMMAND_VERSION), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "could not write version command: %d\n", err); return err; } // enter sample mode err = irtoy_command(irtoy, COMMAND_SMODE_ENTER, sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) dev_err(irtoy->dev, "could not write sample command: %d\n", err); return err; } /* * When sending IR, it is imperative that we send the IR data as quickly * as possible to the device, so it does not run out of IR data and * introduce gaps. Allocate the buffer here, and then feed the data from * the urb callback handler. */ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) { struct irtoy *irtoy = rc->priv; unsigned int i, size; __be16 *buf; int err; size = sizeof(u16) * (count + 1); buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; for (i = 0; i < count; i++) { u16 v = DIV_ROUND_CLOSEST(txbuf[i], UNIT_US); if (!v) v = 1; buf[i] = cpu_to_be16(v); } buf[count] = cpu_to_be16(0xffff); irtoy->tx_buf = buf; irtoy->tx_len = size; irtoy->emitted = 0; // There is an issue where if the unit is receiving IR while the // first TXSTART command is sent, the device might end up hanging // with its led on. It does not respond to any command when this // happens. To work around this, re-enter sample mode. err = irtoy_command(irtoy, COMMAND_SMODE_EXIT, sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); kfree(buf); return err; } err = irtoy_command(irtoy, COMMAND_SMODE_ENTER, sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); kfree(buf); return err; } err = irtoy_command(irtoy, COMMAND_TXSTART, sizeof(COMMAND_TXSTART), STATE_TX); kfree(buf); if (err) { dev_err(irtoy->dev, "failed to send tx start command: %d\n", err); // not sure what state the device is in, reset it irtoy_setup(irtoy); return err; } if (size != irtoy->emitted) { dev_err(irtoy->dev, "expected %u emitted, got %u\n", size, irtoy->emitted); // not sure what state the device is in, reset it irtoy_setup(irtoy); return -EINVAL; } return count; } static int irtoy_tx_carrier(struct rc_dev *rc, uint32_t carrier) { struct irtoy *irtoy = rc->priv; u8 buf[3]; int err; if (carrier < 11800) return -EINVAL; buf[0] = 0x06; buf[1] = DIV_ROUND_CLOSEST(48000000, 16 * carrier) - 1; buf[2] = 0; err = irtoy_command(irtoy, buf, sizeof(buf), STATE_COMMAND_NO_RESP); if (err) dev_err(irtoy->dev, "could not write carrier command: %d\n", err); return err; } static int irtoy_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_host_interface *idesc = intf->cur_altsetting; struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep_in = NULL; struct usb_endpoint_descriptor *ep_out = NULL; struct usb_endpoint_descriptor *ep = NULL; struct irtoy *irtoy; struct rc_dev *rc; struct urb *urb; int i, pipe, err = -ENOMEM; for (i = 0; i < idesc->desc.bNumEndpoints; i++) { ep = &idesc->endpoint[i].desc; if (!ep_in && usb_endpoint_is_bulk_in(ep) && usb_endpoint_maxp(ep) == MAX_PACKET) ep_in = ep; if (!ep_out && usb_endpoint_is_bulk_out(ep) && usb_endpoint_maxp(ep) == MAX_PACKET) ep_out = ep; } if (!ep_in || !ep_out) { dev_err(&intf->dev, "required endpoints not found\n"); return -ENODEV; } irtoy = kzalloc(sizeof(*irtoy), GFP_KERNEL); if (!irtoy) return -ENOMEM; irtoy->in = kmalloc(MAX_PACKET, GFP_KERNEL); if (!irtoy->in) goto free_irtoy; irtoy->out = kmalloc(MAX_PACKET, GFP_KERNEL); if (!irtoy->out) goto free_irtoy; rc = rc_allocate_device(RC_DRIVER_IR_RAW); if (!rc) goto free_irtoy; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto free_rcdev; pipe = usb_rcvbulkpipe(usbdev, ep_in->bEndpointAddress); usb_fill_bulk_urb(urb, usbdev, pipe, irtoy->in, MAX_PACKET, irtoy_in_callback, irtoy); irtoy->urb_in = urb; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto free_rcdev; pipe = usb_sndbulkpipe(usbdev, ep_out->bEndpointAddress); usb_fill_bulk_urb(urb, usbdev, pipe, irtoy->out, MAX_PACKET, irtoy_out_callback, irtoy); irtoy->dev = &intf->dev; irtoy->usbdev = usbdev; irtoy->rc = rc; irtoy->urb_out = urb; irtoy->pulse = true; err = usb_submit_urb(irtoy->urb_in, GFP_KERNEL); if (err != 0) { dev_err(irtoy->dev, "fail to submit in urb: %d\n", err); goto free_rcdev; } err = irtoy_setup(irtoy); if (err) goto free_rcdev; dev_info(irtoy->dev, "version: hardware %u, firmware %u.%u, protocol %u", irtoy->hw_version, irtoy->sw_version / 10, irtoy->sw_version % 10, irtoy->proto_version); if (irtoy->sw_version < MIN_FW_VERSION) { dev_err(irtoy->dev, "need firmware V%02u or higher", MIN_FW_VERSION); err = -ENODEV; goto free_rcdev; } usb_make_path(usbdev, irtoy->phys, sizeof(irtoy->phys)); rc->device_name = "Infrared Toy"; rc->driver_name = KBUILD_MODNAME; rc->input_phys = irtoy->phys; usb_to_input_id(usbdev, &rc->input_id); rc->dev.parent = &intf->dev; rc->priv = irtoy; rc->tx_ir = irtoy_tx; rc->s_tx_carrier = irtoy_tx_carrier; rc->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER; rc->map_name = RC_MAP_RC6_MCE; rc->rx_resolution = UNIT_US; rc->timeout = IR_DEFAULT_TIMEOUT; /* * end of transmission is detected by absence of a usb packet * with more pulse/spaces. However, each usb packet sent can * contain 32 pulse/spaces, which can be quite lengthy, so there * can be a delay between usb packets. For example with nec there is a * 17ms gap between packets. * * So, make timeout a largish minimum which works with most protocols. */ rc->min_timeout = MS_TO_US(40); rc->max_timeout = MAX_TIMEOUT_US; err = rc_register_device(rc); if (err) goto free_rcdev; usb_set_intfdata(intf, irtoy); return 0; free_rcdev: usb_kill_urb(irtoy->urb_out); usb_free_urb(irtoy->urb_out); usb_kill_urb(irtoy->urb_in); usb_free_urb(irtoy->urb_in); rc_free_device(rc); free_irtoy: kfree(irtoy->in); kfree(irtoy->out); kfree(irtoy); return err; } static void irtoy_disconnect(struct usb_interface *intf) { struct irtoy *ir = usb_get_intfdata(intf); rc_unregister_device(ir->rc); usb_set_intfdata(intf, NULL); usb_kill_urb(ir->urb_out); usb_free_urb(ir->urb_out); usb_kill_urb(ir->urb_in); usb_free_urb(ir->urb_in); kfree(ir->in); kfree(ir->out); kfree(ir); } static const struct usb_device_id irtoy_table[] = { { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xfd08, USB_CLASS_CDC_DATA) }, { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xf58b, USB_CLASS_CDC_DATA) }, { } }; static struct usb_driver irtoy_driver = { .name = KBUILD_MODNAME, .probe = irtoy_probe, .disconnect = irtoy_disconnect, .id_table = irtoy_table, }; module_usb_driver(irtoy_driver); MODULE_AUTHOR("Sean Young <sean@mess.org>"); MODULE_DESCRIPTION("Infrared Toy and IR Droid driver"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(usb, irtoy_table);
29 9 9 7 7 9 9 8 8 5 5 3 4 4 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for 3-way parallel assembler optimized version of Twofish * * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> */ #include <asm/cpu_device_id.h> #include <crypto/algapi.h> #include <crypto/twofish.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include "twofish.h" #include "ecb_cbc_helpers.h" EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return twofish_setkey(&tfm->base, key, keylen); } static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) { u8 buf[2][TF_BLOCK_SIZE]; const u8 *s = src; if (dst == src) s = memcpy(buf, src, sizeof(buf)); twofish_dec_blk_3way(ctx, dst, src); crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf)); } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, TF_BLOCK_SIZE, -1); ECB_BLOCK(3, twofish_enc_blk_3way); ECB_BLOCK(1, twofish_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, TF_BLOCK_SIZE, -1); ECB_BLOCK(3, twofish_dec_blk_3way); ECB_BLOCK(1, twofish_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, TF_BLOCK_SIZE, -1); CBC_ENC_BLOCK(twofish_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, TF_BLOCK_SIZE, -1); CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); CBC_DEC_BLOCK(1, twofish_dec_blk); CBC_WALK_END(); } static struct skcipher_alg tf_skciphers[] = { { .base.cra_name = "ecb(twofish)", .base.cra_driver_name = "ecb-twofish-3way", .base.cra_priority = 300, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, .min_keysize = TF_MIN_KEY_SIZE, .max_keysize = TF_MAX_KEY_SIZE, .setkey = twofish_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(twofish)", .base.cra_driver_name = "cbc-twofish-3way", .base.cra_priority = 300, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, .min_keysize = TF_MIN_KEY_SIZE, .max_keysize = TF_MAX_KEY_SIZE, .ivsize = TF_BLOCK_SIZE, .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; switch (boot_cpu_data.x86_vfm) { case INTEL_ATOM_BONNELL: case INTEL_ATOM_BONNELL_MID: case INTEL_ATOM_SALTWELL: /* * On Atom, twofish-3way is slower than original assembler * implementation. Twofish-3way trades off some performance in * storing blocks in 64bit registers to allow three blocks to * be processed parallel. Parallel operation then allows gaining * more performance than was trade off, on out-of-order CPUs. * However Atom does not benefit from this parallelism and * should be blacklisted. */ return true; } if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, twofish-3way is slower than original assembler * implementation because excessive uses of 64bit rotate and * left-shifts (which are really slow on P4) needed to store and * handle 128bit block in two 64bit registers. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init twofish_3way_init(void) { if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "twofish-x86_64-3way: performance on this CPU " "would be suboptimal: disabling " "twofish-x86_64-3way.\n"); return -ENODEV; } return crypto_register_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } static void __exit twofish_3way_fini(void) { crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } module_init(twofish_3way_init); module_exit(twofish_3way_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); MODULE_ALIAS_CRYPTO("twofish"); MODULE_ALIAS_CRYPTO("twofish-asm");
2 2 2 2 29 8 23 2 3 22 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 // SPDX-License-Identifier: GPL-2.0 /* * Implement the manual drop-all-pagecache function */ #include <linux/pagemap.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/writeback.h> #include <linux/sysctl.h> #include <linux/gfp.h> #include <linux/swap.h> #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); /* * We must skip inodes in unusual state. We may also skip * inodes without pages but we deliberately won't in case * we need to reschedule to avoid softlockups. */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || (mapping_empty(inode->i_mapping) && !need_resched())) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(toput_inode); } int drop_caches_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (ret) return ret; if (write) { static int stfu; if (sysctl_drop_caches & 1) { lru_add_drain_all(); iterate_supers(drop_pagecache_sb, NULL); count_vm_event(DROP_PAGECACHE); } if (sysctl_drop_caches & 2) { drop_slab(); count_vm_event(DROP_SLAB); } if (!stfu) { pr_info("%s (%d): drop_caches: %d\n", current->comm, task_pid_nr(current), sysctl_drop_caches); } stfu |= sysctl_drop_caches & 4; } return 0; }
1 1 41 127 29 82 14 94 16 11 11 29 28 27 2 17 11 16 53 176 103 103 104 6 97 104 106 106 121 126 18 51 51 51 42 30 30 27 27 27 77 19 51 13 29 27 40 40 27 51 2 49 49 49 49 3 51 51 51 49 2 2 12 12 12 40 40 40 5 40 48 48 2 48 3 40 40 40 4 40 5 40 2 40 38 1 40 40 1 4 61 2 3 11 9 31 29 30 25 9 12 48 9 9 9 9 12 9 29 29 100 91 92 51 21 1 48 8 8 1 1 1 11 9 29 3 5 11 9 1 8 2 14 29 47 6 11 31 5 31 29 31 31 19 5 2 12 14 8 4 2 8 2 11 7 1 6 13 13 13 10 4 4 41 5 5 5 100 99 5 8 3 47 1 31 62 63 55 62 10 53 2 1 1 1 25 1 2 22 1 2 1 1 2 7 5 3 4 15 77 7 19 19 4 1 1 5 2 2 21 2 3 3 2 2 2 2 1 4 10 3 1 20 18 18 4 13 13 135 32 8 77 30 1 4 4 2 4 9 4 12 12 1 75 3 76 77 3 77 77 116 116 92 9 67 5 1 2 2 42 2 41 10 33 50 51 37 21 10 3 4 1 2 2 1 3 2 6 7 84 50 60 1 21 51 3 6 48 7 3 4 48 5 5 18 7 13 1 4 2 32 33 6 12 12 12 3 10 10 2 12 12 11 3 10 3 3 1 12 12 85 83 11 5 2 99 53 5 95 16 15 10 7 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Copyright (C) 2006 Rusty Russell IBM Corporation * * Author: Michael S. Tsirkin <mst@redhat.com> * * Inspiration, some code, and most witty comments come from * Documentation/virtual/lguest/lguest.c, by Rusty Russell * * Generic code for virtio server in host kernel. */ #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/file.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/sort.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/vhost_task.h> #include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include <linux/kcov.h> #include "vhost.h" static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); static int max_iotlb_entries = 2048; module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); enum { VHOST_MEMORY_F_LOG = 0x1, }; #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) { vq->user_be = true; } static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) { vq->user_be = false; } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; if (s.num != VHOST_VRING_LITTLE_ENDIAN && s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; if (s.num == VHOST_VRING_BIG_ENDIAN) vhost_enable_cross_endian_big(vq); else vhost_enable_cross_endian_little(vq); return 0; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { struct vhost_vring_state s = { .index = idx, .num = vq->user_be }; if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { /* Note for legacy virtio: user_be is initialized at reset time * according to the host endianness. If userspace does not set an * explicit endianness, the default behavior is native endian, as * expected by legacy virtio. */ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { return -ENOIOCTLCMD; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { return -ENOIOCTLCMD; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { vhost_init_is_le(vq); } struct vhost_flush_struct { struct vhost_work work; struct completion wait_event; }; static void vhost_flush_work(struct vhost_work *work) { struct vhost_flush_struct *s; s = container_of(work, struct vhost_flush_struct, work); complete(&s->wait_event); } static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct vhost_poll *poll; poll = container_of(pt, struct vhost_poll, table); poll->wqh = wqh; add_wait_queue(wqh, &poll->wait); } static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; if (!poll->dev->use_worker) work->fn(work); else vhost_poll_queue(poll); return 0; } void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; } EXPORT_SYMBOL_GPL(vhost_work_init); /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev, struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; poll->wqh = NULL; poll->vq = vq; vhost_work_init(&poll->work, fn); } EXPORT_SYMBOL_GPL(vhost_poll_init); /* Start polling a file. We add ourselves to file's wait queue. The caller must * keep a reference to a file until after vhost_poll_stop is called. */ int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; if (poll->wqh) return 0; mask = vfs_poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); return -EINVAL; } return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); /* Stop polling a file. After this function returns, it becomes safe to drop the * file reference. You must also flush afterwards. */ void vhost_poll_stop(struct vhost_poll *poll) { if (poll->wqh) { remove_wait_queue(poll->wqh, &poll->wait); poll->wqh = NULL; } } EXPORT_SYMBOL_GPL(vhost_poll_stop); static void vhost_worker_queue(struct vhost_worker *worker, struct vhost_work *work) { if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { /* We can only add the work to the list after we're * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &worker->work_list); vhost_task_wake(worker->vtsk); } } bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) { struct vhost_worker *worker; bool queued = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker) { queued = true; vhost_worker_queue(worker, work); } rcu_read_unlock(); return queued; } EXPORT_SYMBOL_GPL(vhost_vq_work_queue); /** * __vhost_worker_flush - flush a worker * @worker: worker to flush * * The worker's flush_mutex must be held. */ static void __vhost_worker_flush(struct vhost_worker *worker) { struct vhost_flush_struct flush; if (!worker->attachment_cnt || worker->killed) return; init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); vhost_worker_queue(worker, &flush.work); /* * Drop mutex in case our worker is killed and it needs to take the * mutex to force cleanup. */ mutex_unlock(&worker->mutex); wait_for_completion(&flush.wait_event); mutex_lock(&worker->mutex); } static void vhost_worker_flush(struct vhost_worker *worker) { mutex_lock(&worker->mutex); __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); } void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; xa_for_each(&dev->worker_xa, i, worker) vhost_worker_flush(worker); } EXPORT_SYMBOL_GPL(vhost_dev_flush); /* A lockless hint for busy polling code to exit the loop */ bool vhost_vq_has_work(struct vhost_virtqueue *vq) { struct vhost_worker *worker; bool has_work = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker && !llist_empty(&worker->work_list)) has_work = true; rcu_read_unlock(); return has_work; } EXPORT_SYMBOL_GPL(vhost_vq_has_work); void vhost_poll_queue(struct vhost_poll *poll) { vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) { int j; for (j = 0; j < VHOST_NUM_ADDRS; j++) vq->meta_iotlb[j] = NULL; } static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; for (i = 0; i < d->nvqs; ++i) __vhost_vq_meta_reset(d->vqs[i]); } static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) { call_ctx->ctx = NULL; memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); } bool vhost_vq_is_setup(struct vhost_virtqueue *vq) { return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); } EXPORT_SYMBOL_GPL(vhost_vq_is_setup); static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { vq->num = 1; vq->desc = NULL; vq->avail = NULL; vq->used = NULL; vq->last_avail_idx = 0; vq->avail_idx = 0; vq->last_used_idx = 0; vq->signalled_used = 0; vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; vq->private_data = NULL; vq->acked_features = 0; vq->acked_backend_features = 0; vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; vhost_disable_cross_endian(vq); vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; rcu_assign_pointer(vq->worker, NULL); vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct llist_node *node; node = llist_del_all(&worker->work_list); if (node) { __set_current_state(TASK_RUNNING); node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); cond_resched(); } } return !!node; } static void vhost_worker_killed(void *data) { struct vhost_worker *worker = data; struct vhost_dev *dev = worker->dev; struct vhost_virtqueue *vq; int i, attach_cnt = 0; mutex_lock(&worker->mutex); worker->killed = true; for (i = 0; i < dev->nvqs; i++) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); if (worker == rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex))) { rcu_assign_pointer(vq->worker, NULL); attach_cnt++; } mutex_unlock(&vq->mutex); } worker->attachment_cnt -= attach_cnt; if (attach_cnt) synchronize_rcu(); /* * Finish vhost_worker_flush calls and any other works that snuck in * before the synchronize_rcu. */ vhost_run_work_list(worker); mutex_unlock(&worker->mutex); } static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->indirect); vq->indirect = NULL; kfree(vq->log); vq->log = NULL; kfree(vq->heads); vq->heads = NULL; } /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { struct vhost_virtqueue *vq; int i; for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->indirect = kmalloc_array(UIO_MAXIOV, sizeof(*vq->indirect), GFP_KERNEL); vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), GFP_KERNEL); vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); if (!vq->indirect || !vq->log || !vq->heads) goto err_nomem; } return 0; err_nomem: for (; i >= 0; --i) vhost_vq_free_iovecs(dev->vqs[i]); return -ENOMEM; } static void vhost_dev_free_iovecs(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) vhost_vq_free_iovecs(dev->vqs[i]); } bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len) { struct vhost_dev *dev = vq->dev; if ((dev->byte_weight && total_len >= dev->byte_weight) || pkts >= dev->weight) { vhost_poll_queue(&vq->poll); return true; } return false; } EXPORT_SYMBOL_GPL(vhost_exceeds_weight); static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->avail, ring, num), event); } static size_t vhost_get_used_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->used, ring, num), event); } static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, unsigned int num) { return sizeof(*vq->desc) * num; } void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, bool use_worker, int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)) { struct vhost_virtqueue *vq; int i; dev->vqs = vqs; dev->nvqs = nvqs; mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; dev->use_worker = use_worker; dev->msg_handler = msg_handler; init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; vq->dev = dev; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, EPOLLIN, dev, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); /* Caller should have device mutex */ long vhost_dev_check_owner(struct vhost_dev *dev) { /* Are you the owner? If not, I don't think you mean to do that */ return dev->mm == current->mm ? 0 : -EPERM; } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { return dev->mm; } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); static void vhost_attach_mm(struct vhost_dev *dev) { /* No owner, become one */ if (dev->use_worker) { dev->mm = get_task_mm(current); } else { /* vDPA device does not use worker thead, so there's * no need to hold the address space for mm. This help * to avoid deadlock in the case of mmap() which may * held the refcnt of the file and depends on release * method to remove vma. */ dev->mm = current->mm; mmgrab(dev->mm); } } static void vhost_detach_mm(struct vhost_dev *dev) { if (!dev->mm) return; if (dev->use_worker) mmput(dev->mm); else mmdrop(dev->mm); dev->mm = NULL; } static void vhost_worker_destroy(struct vhost_dev *dev, struct vhost_worker *worker) { if (!worker) return; WARN_ON(!llist_empty(&worker->work_list)); xa_erase(&dev->worker_xa, worker->id); vhost_task_stop(worker->vtsk); kfree(worker); } static void vhost_workers_free(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; if (!dev->use_worker) return; for (i = 0; i < dev->nvqs; i++) rcu_assign_pointer(dev->vqs[i]->worker, NULL); /* * Free the default worker we created and cleanup workers userspace * created but couldn't clean up (it forgot or crashed). */ xa_for_each(&dev->worker_xa, i, worker) vhost_worker_destroy(dev, worker); xa_destroy(&dev->worker_xa); } static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; int ret; u32 id; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) return NULL; worker->dev = dev; snprintf(name, sizeof(name), "vhost-%d", current->pid); vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); if (!vtsk) goto free_worker; mutex_init(&worker->mutex); init_llist_head(&worker->work_list); worker->kcov_handle = kcov_common_handle(); worker->vtsk = vtsk; vhost_task_start(vtsk); ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); if (ret < 0) goto stop_worker; worker->id = id; return worker; stop_worker: vhost_task_stop(vtsk); free_worker: kfree(worker); return NULL; } /* Caller must have device mutex */ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_worker *worker) { struct vhost_worker *old_worker; mutex_lock(&worker->mutex); if (worker->killed) { mutex_unlock(&worker->mutex); return; } mutex_lock(&vq->mutex); old_worker = rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex)); rcu_assign_pointer(vq->worker, worker); worker->attachment_cnt++; if (!old_worker) { mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); return; } mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); /* * Take the worker mutex to make sure we see the work queued from * device wide flushes which doesn't use RCU for execution. */ mutex_lock(&old_worker->mutex); if (old_worker->killed) { mutex_unlock(&old_worker->mutex); return; } /* * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done * VHOST_SET_VRING_KICK and not done the driver specific * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will * not be any works queued for scsi and net. */ mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq) && !vq->kick) { mutex_unlock(&vq->mutex); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); /* * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. * Warn if it adds support for multiple workers but forgets to * handle the early queueing case. */ WARN_ON(!old_worker->attachment_cnt && !llist_empty(&old_worker->work_list)); return; } mutex_unlock(&vq->mutex); /* Make sure new vq queue/flush/poll calls see the new worker */ synchronize_rcu(); /* Make sure whatever was queued gets run */ __vhost_worker_flush(old_worker); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); } /* Caller must have device mutex */ static int vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_vring_worker *info) { unsigned long index = info->worker_id; struct vhost_dev *dev = vq->dev; struct vhost_worker *worker; if (!dev->use_worker) return -EINVAL; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; __vhost_vq_attach_worker(vq, worker); return 0; } /* Caller must have device mutex */ static int vhost_new_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { struct vhost_worker *worker; worker = vhost_worker_create(dev); if (!worker) return -ENOMEM; info->worker_id = worker->id; return 0; } /* Caller must have device mutex */ static int vhost_free_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { unsigned long index = info->worker_id; struct vhost_worker *worker; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; mutex_lock(&worker->mutex); if (worker->attachment_cnt || worker->killed) { mutex_unlock(&worker->mutex); return -EBUSY; } /* * A flush might have raced and snuck in before attachment_cnt was set * to zero. Make sure flushes are flushed from the queue before * freeing. */ __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); vhost_worker_destroy(dev, worker); return 0; } static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp, struct vhost_virtqueue **vq, u32 *id) { u32 __user *idxp = argp; u32 idx; long r; r = get_user(idx, idxp); if (r < 0) return r; if (idx >= dev->nvqs) return -ENOBUFS; idx = array_index_nospec(idx, dev->nvqs); *vq = dev->vqs[idx]; *id = idx; return 0; } /* Caller must have device mutex */ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, void __user *argp) { struct vhost_vring_worker ring_worker; struct vhost_worker_state state; struct vhost_worker *worker; struct vhost_virtqueue *vq; long ret; u32 idx; if (!dev->use_worker) return -EINVAL; if (!vhost_dev_has_owner(dev)) return -EINVAL; ret = vhost_dev_check_owner(dev); if (ret) return ret; switch (ioctl) { /* dev worker ioctls */ case VHOST_NEW_WORKER: ret = vhost_new_worker(dev, &state); if (!ret && copy_to_user(argp, &state, sizeof(state))) ret = -EFAULT; return ret; case VHOST_FREE_WORKER: if (copy_from_user(&state, argp, sizeof(state))) return -EFAULT; return vhost_free_worker(dev, &state); /* vring worker ioctls */ case VHOST_ATTACH_VRING_WORKER: case VHOST_GET_VRING_WORKER: break; default: return -ENOIOCTLCMD; } ret = vhost_get_vq_from_user(dev, argp, &vq, &idx); if (ret) return ret; switch (ioctl) { case VHOST_ATTACH_VRING_WORKER: if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) { ret = -EFAULT; break; } ret = vhost_vq_attach_worker(vq, &ring_worker); break; case VHOST_GET_VRING_WORKER: worker = rcu_dereference_check(vq->worker, lockdep_is_held(&dev->mutex)); if (!worker) { ret = -EINVAL; break; } ring_worker.index = idx; ring_worker.worker_id = worker->id; if (copy_to_user(argp, &ring_worker, sizeof(ring_worker))) ret = -EFAULT; break; default: ret = -ENOIOCTLCMD; break; } return ret; } EXPORT_SYMBOL_GPL(vhost_worker_ioctl); /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { struct vhost_worker *worker; int err, i; /* Is there an owner already? */ if (vhost_dev_has_owner(dev)) { err = -EBUSY; goto err_mm; } vhost_attach_mm(dev); err = vhost_dev_alloc_iovecs(dev); if (err) goto err_iovecs; if (dev->use_worker) { /* * This should be done last, because vsock can queue work * before VHOST_SET_OWNER so it simplifies the failure path * below since we don't have to worry about vsock queueing * while we free the worker. */ worker = vhost_worker_create(dev); if (!worker) { err = -ENOMEM; goto err_worker; } for (i = 0; i < dev->nvqs; i++) __vhost_vq_attach_worker(dev->vqs[i], worker); } return 0; err_worker: vhost_dev_free_iovecs(dev); err_iovecs: vhost_detach_mm(dev); err_mm: return err; } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); static struct vhost_iotlb *iotlb_alloc(void) { return vhost_iotlb_alloc(max_iotlb_entries, VHOST_IOTLB_FLAG_RETIRE); } struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) { return iotlb_alloc(); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) { int i; vhost_dev_cleanup(dev); dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); void vhost_dev_stop(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) vhost_poll_stop(&dev->vqs[i]->poll); } vhost_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); void vhost_clear_msg(struct vhost_dev *dev) { struct vhost_msg_node *node, *n; spin_lock(&dev->iotlb_lock); list_for_each_entry_safe(node, n, &dev->read_list, node) { list_del(&node->node); kfree(node); } list_for_each_entry_safe(node, n, &dev->pending_list, node) { list_del(&node->node); kfree(node); } spin_unlock(&dev->iotlb_lock); } EXPORT_SYMBOL_GPL(vhost_clear_msg); void vhost_dev_cleanup(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); if (dev->vqs[i]->call_ctx.ctx) eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) eventfd_ctx_put(dev->log_ctx); dev->log_ctx = NULL; /* No one will access memory at this point */ vhost_iotlb_free(dev->umem); dev->umem = NULL; vhost_iotlb_free(dev->iotlb); dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); vhost_workers_free(dev); vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { u64 a = addr / VHOST_PAGE_SIZE / 8; /* Make sure 64 bit math will not overflow. */ if (a > ULONG_MAX - (unsigned long)log_base || a + (unsigned long)log_base > ULONG_MAX) return false; return access_ok(log_base + a, (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } /* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { if (uaddr > ULONG_MAX || size > ULONG_MAX) return true; if (!size) return false; return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, int log_all) { struct vhost_iotlb_map *map; if (!umem) return false; list_for_each_entry(map, &umem->list, link) { unsigned long a = map->addr; if (vhost_overflow(map->addr, map->size)) return false; if (!access_ok((void __user *)a, map->size)) return false; else if (log_all && !log_access_ok(log_base, map->start, map->size)) return false; } return true; } static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, u64 addr, unsigned int size, int type) { const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; if (!map) return NULL; return (void __user *)(uintptr_t)(map->addr + addr - map->start); } /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, int log_all) { int i; for (i = 0; i < d->nvqs; ++i) { bool ok; bool log; mutex_lock(&d->vqs[i]->mutex); log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) ok = vq_memory_access_ok(d->vqs[i]->log_base, umem, log); else ok = true; mutex_unlock(&d->vqs[i]->mutex); if (!ok) return false; } return true; } static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access); static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, const void *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_to_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that all vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ struct iov_iter t; void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)to, size, VHOST_ADDR_USED); if (uaddr) return __copy_to_user(uaddr, from, size); ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_WO); if (ret < 0) goto out; iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size); ret = copy_to_iter(from, size, &t); if (ret == size) ret = 0; } out: return ret; } static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, void __user *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_from_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)from, size, VHOST_ADDR_DESC); struct iov_iter f; if (uaddr) return __copy_from_user(to, uaddr, size); ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", from, (unsigned long long) size); goto out; } iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size); ret = copy_from_iter(to, size, &f); if (ret == size) ret = 0; } out: return ret; } static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { int ret; ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { vq_err(vq, "Non atomic userspace memory access: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } return vq->iotlb_iov[0].iov_base; } /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)addr, size, type); if (uaddr) return uaddr; return __vhost_get_user_slow(vq, addr, size, type); } #define vhost_put_user(vq, x, ptr) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ __typeof__(ptr) to = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), VHOST_ADDR_USED); \ if (to != NULL) \ ret = __put_user(x, to); \ else \ ret = -EFAULT; \ } \ ret; \ }) static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } #define vhost_get_user(vq, x, ptr, type) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __get_user(x, ptr); \ } else { \ __typeof__(ptr) from = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), \ type); \ if (from != NULL) \ ret = __get_user(x, from); \ else \ ret = -EFAULT; \ } \ ret; \ }) #define vhost_get_avail(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_unlock(&d->vqs[i]->mutex); } static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq) { __virtio16 idx; int r; r = vhost_get_avail(vq, idx, &vq->avail->idx); if (unlikely(r < 0)) { vq_err(vq, "Failed to access available index at %p (%d)\n", &vq->avail->idx, r); return r; } /* Check it isn't doing very strange thing with available indexes */ vq->avail_idx = vhost16_to_cpu(vq, idx); if (unlikely((u16)(vq->avail_idx - vq->last_avail_idx) > vq->num)) { vq_err(vq, "Invalid available index change from %u to %u", vq->last_avail_idx, vq->avail_idx); return -EINVAL; } /* We're done if there is nothing new */ if (vq->avail_idx == vq->last_avail_idx) return 0; /* * We updated vq->avail_idx so we need a memory barrier between * the index read above and the caller reading avail ring entries. */ smp_rmb(); return 1; } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } static void vhost_iotlb_notify_vq(struct vhost_dev *d, struct vhost_iotlb_msg *msg) { struct vhost_msg_node *node, *n; spin_lock(&d->iotlb_lock); list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); kfree(node); } } spin_unlock(&d->iotlb_lock); } static bool umem_access_ok(u64 uaddr, u64 size, int access) { unsigned long a = uaddr; /* Make sure 64 bit math will not overflow. */ if (vhost_overflow(uaddr, size)) return false; if ((access & VHOST_ACCESS_RO) && !access_ok((void __user *)a, size)) return false; if ((access & VHOST_ACCESS_WO) && !access_ok((void __user *)a, size)) return false; return true; } static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { int ret = 0; if (asid != 0) return -EINVAL; mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { ret = -EFAULT; break; } if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); if (vhost_iotlb_add_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1, msg->uaddr, msg->perm)) { ret = -ENOMEM; break; } vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: if (!dev->iotlb) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); vhost_iotlb_del_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); break; default: ret = -EINVAL; break; } vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; } ssize_t vhost_chr_write_iter(struct vhost_dev *dev, struct iov_iter *from) { struct vhost_iotlb_msg msg; size_t offset; int type, ret; u32 asid = 0; ret = copy_from_iter(&type, sizeof(type), from); if (ret != sizeof(type)) { ret = -EINVAL; goto done; } switch (type) { case VHOST_IOTLB_MSG: /* There maybe a hole after type for V1 message type, * so skip it here. */ offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); break; case VHOST_IOTLB_MSG_V2: if (vhost_backend_has_feature(dev->vqs[0], VHOST_BACKEND_F_IOTLB_ASID)) { ret = copy_from_iter(&asid, sizeof(asid), from); if (ret != sizeof(asid)) { ret = -EINVAL; goto done; } offset = 0; } else offset = sizeof(__u32); break; default: ret = -EINVAL; goto done; } iov_iter_advance(from, offset); ret = copy_from_iter(&msg, sizeof(msg), from); if (ret != sizeof(msg)) { ret = -EINVAL; goto done; } if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) { ret = -EINVAL; goto done; } if (dev->msg_handler) ret = dev->msg_handler(dev, asid, &msg); else ret = vhost_process_iotlb_msg(dev, asid, &msg); if (ret) { ret = -EFAULT; goto done; } ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : sizeof(struct vhost_msg_v2); done: return ret; } EXPORT_SYMBOL(vhost_chr_write_iter); __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, poll_table *wait) { __poll_t mask = 0; poll_wait(file, &dev->wait, wait); if (!list_empty(&dev->read_list)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(vhost_chr_poll); ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, int noblock) { DEFINE_WAIT(wait); struct vhost_msg_node *node; ssize_t ret = 0; unsigned size = sizeof(struct vhost_msg); if (iov_iter_count(to) < size) return 0; while (1) { if (!noblock) prepare_to_wait(&dev->wait, &wait, TASK_INTERRUPTIBLE); node = vhost_dequeue_msg(dev, &dev->read_list); if (node) break; if (noblock) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!dev->iotlb) { ret = -EBADFD; break; } schedule(); } if (!noblock) finish_wait(&dev->wait, &wait); if (node) { struct vhost_iotlb_msg *msg; void *start = &node->msg; switch (node->msg.type) { case VHOST_IOTLB_MSG: size = sizeof(node->msg); msg = &node->msg.iotlb; break; case VHOST_IOTLB_MSG_V2: size = sizeof(node->msg_v2); msg = &node->msg_v2.iotlb; break; default: BUG(); break; } ret = copy_to_iter(start, size, to); if (ret != size || msg->type != VHOST_IOTLB_MISS) { kfree(node); return ret; } vhost_enqueue_msg(dev, &dev->pending_list, node); } return ret; } EXPORT_SYMBOL_GPL(vhost_chr_read_iter); static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) { struct vhost_dev *dev = vq->dev; struct vhost_msg_node *node; struct vhost_iotlb_msg *msg; bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); if (!node) return -ENOMEM; if (v2) { node->msg_v2.type = VHOST_IOTLB_MSG_V2; msg = &node->msg_v2.iotlb; } else { msg = &node->msg.iotlb; } msg->type = VHOST_IOTLB_MISS; msg->iova = iova; msg->perm = access; vhost_enqueue_msg(dev, &dev->read_list, node); return 0; } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_desc_t __user *desc, vring_avail_t __user *avail, vring_used_t __user *used) { /* If an IOTLB device is present, the vring addresses are * GIOVAs. Access validation occurs at prefetch time. */ if (vq->iotlb) return true; return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); } static void vhost_vq_meta_update(struct vhost_virtqueue *vq, const struct vhost_iotlb_map *map, int type) { int access = (type == VHOST_ADDR_USED) ? VHOST_ACCESS_WO : VHOST_ACCESS_RO; if (likely(map->perm & access)) vq->meta_iotlb[type] = map; } static bool iotlb_access_ok(struct vhost_virtqueue *vq, int access, u64 addr, u64 len, int type) { const struct vhost_iotlb_map *map; struct vhost_iotlb *umem = vq->iotlb; u64 s = 0, size, orig_addr = addr, last = addr + len - 1; if (vhost_vq_meta_fetch(vq, addr, len, type)) return true; while (len > s) { map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { vhost_iotlb_miss(vq, addr, access); return false; } else if (!(map->perm & access)) { /* Report the possible access violation by * request another translation from userspace. */ return false; } size = map->size - addr + map->start; if (orig_addr == addr && size >= len) vhost_vq_meta_update(vq, map, type); s += size; addr += size; } return true; } int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; if (!vq->iotlb) return 1; return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, vhost_get_avail_size(vq, num), VHOST_ADDR_AVAIL) && iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, vhost_get_used_size(vq, num), VHOST_ADDR_USED); } EXPORT_SYMBOL_GPL(vq_meta_prefetch); /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ bool vhost_log_access_ok(struct vhost_dev *dev) { return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, void __user *log_base, bool log_used, u64 log_addr) { /* If an IOTLB device is present, log_addr is a GIOVA that * will never be logged by log_used(). */ if (vq->iotlb) return true; return !log_used || log_access_ok(log_base, log_addr, vhost_get_used_size(vq, vq->num)); } /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static bool vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) { if (!vq_log_access_ok(vq, vq->log_base)) return false; return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem; struct vhost_memory_region *region; struct vhost_iotlb *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; if (copy_from_user(&mem, m, size)) return -EFAULT; if (mem.padding) return -EOPNOTSUPP; if (mem.nregions > max_mem_regions) return -E2BIG; newmem = kvzalloc(struct_size(newmem, regions, mem.nregions), GFP_KERNEL); if (!newmem) return -ENOMEM; memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, flex_array_size(newmem, regions, mem.nregions))) { kvfree(newmem); return -EFAULT; } newumem = iotlb_alloc(); if (!newumem) { kvfree(newmem); return -ENOMEM; } for (region = newmem->regions; region < newmem->regions + mem.nregions; region++) { if (vhost_iotlb_add_range(newumem, region->guest_phys_addr, region->guest_phys_addr + region->memory_size - 1, region->userspace_addr, VHOST_MAP_RW)) goto err; } if (!memory_access_ok(d, newumem, 0)) goto err; oldumem = d->umem; d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } kvfree(newmem); vhost_iotlb_free(oldumem); return 0; err: vhost_iotlb_free(newumem); kvfree(newmem); return -EFAULT; } static long vhost_vring_set_num(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_state s; /* Resizing ring with an active backend? * You don't want to do that. */ if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof s)) return -EFAULT; if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) return -EINVAL; vq->num = s.num; return 0; } static long vhost_vring_set_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_addr a; if (copy_from_user(&a, argp, sizeof a)) return -EFAULT; if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) return -EOPNOTSUPP; /* For 32bit, verify that the top 32bits of the user data are set to zero. */ if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || (u64)(unsigned long)a.used_user_addr != a.used_user_addr || (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) return -EFAULT; /* Make sure it's safe to cast pointers to vring types. */ BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) return -EINVAL; /* We only verify access here if backend is configured. * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { if (!vq_access_ok(vq, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) return -EINVAL; /* Also validate log access for used ring if enabled. */ if (!vq_log_used_access_ok(vq, vq->log_base, a.flags & (0x1 << VHOST_VRING_F_LOG), a.log_guest_addr)) return -EINVAL; } vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); vq->desc = (void __user *)(unsigned long)a.desc_user_addr; vq->avail = (void __user *)(unsigned long)a.avail_user_addr; vq->log_addr = a.log_guest_addr; vq->used = (void __user *)(unsigned long)a.used_user_addr; return 0; } static long vhost_vring_set_num_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, unsigned int ioctl, void __user *argp) { long r; mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); break; case VHOST_SET_VRING_ADDR: r = vhost_vring_set_addr(d, vq, argp); break; default: BUG(); } mutex_unlock(&vq->mutex); return r; } long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; struct eventfd_ctx *ctx = NULL; struct vhost_virtqueue *vq; struct vhost_vring_state s; struct vhost_vring_file f; u32 idx; long r; r = vhost_get_vq_from_user(d, argp, &vq, &idx); if (r < 0) return r; if (ioctl == VHOST_SET_VRING_NUM || ioctl == VHOST_SET_VRING_ADDR) { return vhost_vring_set_num_addr(d, vq, ioctl, argp); } mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_BASE: /* Moving base with an active backend? * You don't want to do that. */ if (vq->private_data) { r = -EBUSY; break; } if (copy_from_user(&s, argp, sizeof s)) { r = -EFAULT; break; } if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { vq->last_avail_idx = s.num & 0xffff; vq->last_used_idx = (s.num >> 16) & 0xffff; } else { if (s.num > 0xffff) { r = -EINVAL; break; } vq->last_avail_idx = s.num; } /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; case VHOST_GET_VRING_BASE: s.index = idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); else s.num = vq->last_avail_idx; if (copy_to_user(argp, &s, sizeof s)) r = -EFAULT; break; case VHOST_SET_VRING_KICK: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; } if (eventfp != vq->kick) { pollstop = (filep = vq->kick) != NULL; pollstart = (vq->kick = eventfp) != NULL; } else filep = eventfp; break; case VHOST_SET_VRING_CALL: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->call_ctx.ctx); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->error_ctx); break; case VHOST_SET_VRING_ENDIAN: r = vhost_set_vring_endian(vq, argp); break; case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: if (copy_from_user(&s, argp, sizeof(s))) { r = -EFAULT; break; } vq->busyloop_timeout = s.num; break; case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: s.index = idx; s.num = vq->busyloop_timeout; if (copy_to_user(argp, &s, sizeof(s))) r = -EFAULT; break; default: r = -ENOIOCTLCMD; } if (pollstop && vq->handle_kick) vhost_poll_stop(&vq->poll); if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); if (filep) fput(filep); if (pollstart && vq->handle_kick) r = vhost_poll_start(&vq->poll, vq->kick); mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) vhost_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); int vhost_init_device_iotlb(struct vhost_dev *d) { struct vhost_iotlb *niotlb, *oiotlb; int i; niotlb = iotlb_alloc(); if (!niotlb) return -ENOMEM; oiotlb = d->iotlb; d->iotlb = niotlb; for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq = d->vqs[i]; mutex_lock(&vq->mutex); vq->iotlb = niotlb; __vhost_vq_meta_reset(vq); mutex_unlock(&vq->mutex); } vhost_iotlb_free(oiotlb); return 0; } EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct eventfd_ctx *ctx; u64 p; long r; int i, fd; /* If you are not the owner, you can become one */ if (ioctl == VHOST_SET_OWNER) { r = vhost_dev_set_owner(d); goto done; } /* You must be the owner to do anything else */ r = vhost_dev_check_owner(d); if (r) goto done; switch (ioctl) { case VHOST_SET_MEM_TABLE: r = vhost_set_memory(d, argp); break; case VHOST_SET_LOG_BASE: if (copy_from_user(&p, argp, sizeof p)) { r = -EFAULT; break; } if ((u64)(unsigned long)p != p) { r = -EFAULT; break; } for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq; void __user *base = (void __user *)(unsigned long)p; vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ if (vq->private_data && !vq_log_access_ok(vq, base)) r = -EFAULT; else vq->log_base = base; mutex_unlock(&vq->mutex); } break; case VHOST_SET_LOG_FD: r = get_user(fd, (int __user *)argp); if (r < 0) break; ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, d->log_ctx); for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->log_ctx = d->log_ctx; mutex_unlock(&d->vqs[i]->mutex); } if (ctx) eventfd_ctx_put(ctx); break; default: r = -ENOIOCTLCMD; break; } done: return r; } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst. */ static int set_bit_to_user(int nr, void __user *addr) { unsigned long log = (unsigned long)addr; struct page *page; void *base; int bit = nr + (log % PAGE_SIZE) * 8; int r; r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); if (r < 0) return r; BUG_ON(r != 1); base = kmap_atomic(page); set_bit(bit, base); kunmap_atomic(base); unpin_user_pages_dirty_lock(&page, 1, true); return 0; } static int log_write(void __user *log_base, u64 write_address, u64 write_length) { u64 write_page = write_address / VHOST_PAGE_SIZE; int r; if (!write_length) return 0; write_length += write_address % VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; u64 log = base + write_page / 8; int bit = write_page % 8; if ((u64)(unsigned long)log != log) return -EFAULT; r = set_bit_to_user(bit, (void __user *)(unsigned long)log); if (r < 0) return r; if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; write_page += 1; } return r; } static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) { struct vhost_iotlb *umem = vq->umem; struct vhost_iotlb_map *u; u64 start, end, l, min; int r; bool hit = false; while (len) { min = len; /* More than one GPAs can be mapped into a single HVA. So * iterate all possible umems here to be safe. */ list_for_each_entry(u, &umem->list, link) { if (u->addr > hva - 1 + len || u->addr - 1 + u->size < hva) continue; start = max(u->addr, hva); end = min(u->addr - 1 + u->size, hva - 1 + len); l = end - start + 1; r = log_write(vq->log_base, u->start + start - u->addr, l); if (r < 0) return r; hit = true; min = min(l, min); } if (!hit) return -EFAULT; len -= min; hva += min; } return 0; } static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) { struct iovec *iov = vq->log_iov; int i, ret; if (!vq->iotlb) return log_write(vq->log_base, vq->log_addr + used_offset, len); ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, len, iov, 64, VHOST_ACCESS_WO); if (ret < 0) return ret; for (i = 0; i < ret; i++) { ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (ret) return ret; } return 0; } int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); if (vq->iotlb) { for (i = 0; i < count; i++) { r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (r < 0) return r; } return 0; } for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); if (r < 0) return r; len -= l; if (!len) { if (vq->log_ctx) eventfd_signal(vq->log_ctx); return 0; } } /* Length written exceeds what we have stored. This is a bug. */ BUG(); return 0; } EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; if (vhost_put_used_flags(vq)) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; log_used(vq, (used - (void __user *)vq->used), sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } static int vhost_update_avail_event(struct vhost_virtqueue *vq) { if (vhost_put_avail_event(vq)) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; /* Make sure the event is seen before log. */ smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); log_used(vq, (used - (void __user *)vq->used), sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; bool is_le = vq->is_le; if (!vq->private_data) return 0; vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) goto err; vq->signalled_used_valid = false; if (!vq->iotlb && !access_ok(&vq->used->idx, sizeof vq->used->idx)) { r = -EFAULT; goto err; } r = vhost_get_used_idx(vq, &last_used_idx); if (r) { vq_err(vq, "Can't access used idx at %p\n", &vq->used->idx); goto err; } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; err: vq->is_le = is_le; return r; } EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access) { const struct vhost_iotlb_map *map; struct vhost_dev *dev = vq->dev; struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0, last = addr + len - 1; int ret = 0; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { if (umem != dev->iotlb) { ret = -EFAULT; break; } ret = -EAGAIN; break; } else if (!(map->perm & access)) { ret = -EPERM; break; } _iov = iov + ret; size = map->size - addr + map->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) (map->addr + addr - map->start); s += size; addr += size; ++ret; } if (ret == -EAGAIN) vhost_iotlb_miss(vq, addr, access); return ret; } /* Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, * or -1U if we're at the end. */ static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) return -1U; /* Check they're not leading us off end of descriptors. */ next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); return next; } static int get_indirect(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num, struct vring_desc *indirect) { struct vring_desc desc; unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { vq_err(vq, "Invalid length in indirect descriptor: " "len 0x%llx not multiple of 0x%zx\n", (unsigned long long)len, sizeof desc); return -EINVAL; } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len); count = len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { vq_err(vq, "Indirect buffer length too big: %d\n", indirect->len); return -E2BIG; } do { unsigned iov_count = *in_num + *out_num; if (unlikely(++found > count)) { vq_err(vq, "Loop detected: last one at %u " "indirect size %u\n", i, count); return -EINVAL; } if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d indirect idx %d\n", ret, i); return ret; } /* If this is an input descriptor, increment that count. */ if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Indirect descriptor " "has out after in: idx %d\n", i); return -EINVAL; } *out_num += ret; } } while ((i = next_desc(vq, &desc)) != -1); return 0; } /* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * * This function returns the descriptor number found, or vq->num (which is * never a valid descriptor number) if none was found. A negative code is * returned on error. */ int vhost_get_vq_desc(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num) { struct vring_desc desc; unsigned int i, head, found = 0; u16 last_avail_idx = vq->last_avail_idx; __virtio16 ring_head; int ret, access; if (vq->avail_idx == vq->last_avail_idx) { ret = vhost_get_avail_idx(vq); if (unlikely(ret < 0)) return ret; if (!ret) return vq->num; } /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); return -EFAULT; } head = vhost16_to_cpu(vq, ring_head); /* If their number is silly, that's an error. */ if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); return -EINVAL; } /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; if (unlikely(log)) *log_num = 0; i = head; do { unsigned iov_count = *in_num + *out_num; if (unlikely(i >= vq->num)) { vq_err(vq, "Desc index is %u > %u, head = %u", i, vq->num, head); return -EINVAL; } if (unlikely(++found > vq->num)) { vq_err(vq, "Loop detected: last one at %u " "vq size %u head %u\n", i, vq->num, head); return -EINVAL; } ret = vhost_get_desc(vq, &desc, i); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); return -EFAULT; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { ret = get_indirect(vq, iov, iov_size, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Failure detected " "in indirect descriptor at idx %d\n", i); return ret; } continue; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); return ret; } if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Descriptor has out after in: " "idx %d\n", i); return -EINVAL; } *out_num += ret; } } while ((i = next_desc(vq, &desc)) != -1); /* On success, increment avail index. */ vq->last_avail_idx++; /* Assume notifications from guest are disabled at this point, * if they aren't we would need to update avail_event index. */ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { vq->last_avail_idx -= n; } EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { struct vring_used_elem heads = { cpu_to_vhost32(vq, head), cpu_to_vhost32(vq, len) }; return vhost_add_used_n(vq, &heads, 1); } EXPORT_SYMBOL_GPL(vhost_add_used); static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { vring_used_elem_t __user *used; u16 old, new; int start; start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (vhost_put_used(vq, heads, start, count)) { vq_err(vq, "Failed to write used"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ log_used(vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); /* If the driver never bothers to signal in a very long while, * used index might wrap around. If that happens, invalidate * signalled_used index we stored. TODO: make sure driver * signals at least once in 2^16 and remove this. */ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) vq->signalled_used_valid = false; return 0; } /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { int start, n, r; start = vq->last_used_idx & (vq->num - 1); n = vq->num - start; if (n < count) { r = __vhost_add_used_n(vq, heads, n); if (r < 0) return r; heads += n; count -= n; } r = __vhost_add_used_n(vq, heads, count); /* Make sure buffer is written before we update index. */ smp_wmb(); if (vhost_put_used_idx(vq)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ log_used(vq, offsetof(struct vring_used, idx), sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return r; } EXPORT_SYMBOL_GPL(vhost_add_used_n); static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 old, new; __virtio16 event; bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && unlikely(vq->avail_idx == vq->last_avail_idx)) return true; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; if (vhost_get_avail_flags(vq, &flags)) { vq_err(vq, "Failed to get flags"); return true; } return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); } old = vq->signalled_used; v = vq->signalled_used_valid; new = vq->signalled_used = vq->last_used_idx; vq->signalled_used_valid = true; if (unlikely(!v)) return true; if (vhost_get_used_event(vq, &event)) { vq_err(vq, "Failed to get used event idx"); return true; } return vring_need_event(vhost16_to_cpu(vq, event), new, old); } /* This actually signals the guest, using eventfd. */ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ if (vq->call_ctx.ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx.ctx); } EXPORT_SYMBOL_GPL(vhost_signal); /* And here's the combo meal deal. Supersize me! */ void vhost_add_used_and_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned int head, int len) { vhost_add_used(vq, head, len); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); /* multi-buffer version of vhost_add_used_and_signal */ void vhost_add_used_and_signal_n(struct vhost_dev *dev, struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { vhost_add_used_n(vq, heads, count); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); /* return true if we're sure that avaiable ring is empty */ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->avail_idx != vq->last_avail_idx) return false; r = vhost_get_avail_idx(vq); /* Note: we treat error as non-empty here */ return r == 0; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) { vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); return false; } } else { r = vhost_update_avail_event(vq); if (r) { vq_err(vq, "Failed to update avail event index at %p: %d\n", vhost_avail_event(vq), r); return false; } } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); r = vhost_get_avail_idx(vq); /* Note: we treat error as empty here */ if (unlikely(r < 0)) return false; return r; } EXPORT_SYMBOL_GPL(vhost_enable_notify); /* We don't need to be notified again. */ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) vq_err(vq, "Failed to disable notification at %p: %d\n", &vq->used->flags, r); } } EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { /* Make sure all padding within the structure is initialized. */ struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; node->vq = vq; node->msg.type = type; return node; } EXPORT_SYMBOL_GPL(vhost_new_msg); void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, struct vhost_msg_node *node) { spin_lock(&dev->iotlb_lock); list_add_tail(&node->node, head); spin_unlock(&dev->iotlb_lock); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); } EXPORT_SYMBOL_GPL(vhost_enqueue_msg); struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, struct list_head *head) { struct vhost_msg_node *node = NULL; spin_lock(&dev->iotlb_lock); if (!list_empty(head)) { node = list_first_entry(head, struct vhost_msg_node, node); list_del(&node->node); } spin_unlock(&dev->iotlb_lock); return node; } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); void vhost_set_backend_features(struct vhost_dev *dev, u64 features) { struct vhost_virtqueue *vq; int i; mutex_lock(&dev->mutex); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); vq->acked_backend_features = features; mutex_unlock(&vq->mutex); } mutex_unlock(&dev->mutex); } EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { return 0; } static void __exit vhost_exit(void) { } module_init(vhost_init); module_exit(vhost_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio");
62 61 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 #ifndef __LINUX_ERSPAN_H #define __LINUX_ERSPAN_H /* * GRE header for ERSPAN type I encapsulation (4 octets [34:37]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |0|0|0|0|0|00000|000000000|00000| Protocol Type for ERSPAN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * The Type I ERSPAN frame format is based on the barebones IP + GRE * encapsulation (as described above) on top of the raw mirrored frame. * There is no extra ERSPAN header. * * * GRE header for ERSPAN type II and II encapsulation (8 octets [34:41]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Sequence Number (increments per packet per session) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Note that in the above GRE header [RFC1701] out of the C, R, K, S, * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ver | VLAN | COS | En|T| Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * * ERSPAN Version 2 (Type III) header (12 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ver | VLAN | COS |BSO|T| Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Timestamp | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SGT |P| FT | Hw ID |D|Gra|O| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Platform Specific SubHeader (8 octets, optional) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Platf ID | Platform Specific Info | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Platform Specific Info | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB */ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/skbuff.h> #include <uapi/linux/erspan.h> #define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 #define EN_MASK 0x1800 #define T_MASK 0x0400 #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff #define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ #define BSO_MASK EN_MASK #define SGT_MASK 0xffff0000 #define P_MASK 0x8000 #define FT_MASK 0x7c00 #define HWID_MASK 0x03f0 #define DIR_MASK 0x0008 #define GRA_MASK 0x0006 #define O_MASK 0x0001 #define HWID_OFFSET 4 #define DIR_OFFSET 3 enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag preserved in frame */ }; #define ERSPAN_V1_MDSIZE 4 #define ERSPAN_V2_MDSIZE 8 struct erspan_base_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 vlan_upper:4, ver:4; __u8 vlan:8; __u8 session_id_upper:2, t:1, en:2, cos:3; __u8 session_id:8; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 ver: 4, vlan_upper:4; __u8 vlan:8; __u8 cos:3, en:2, t:1, session_id_upper:2; __u8 session_id:8; #else #error "Please fix <asm/byteorder.h>" #endif }; static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id) { ershdr->session_id = id & 0xff; ershdr->session_id_upper = (id >> 8) & 0x3; } static inline u16 get_session_id(const struct erspan_base_hdr *ershdr) { return (ershdr->session_id_upper << 8) + ershdr->session_id; } static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan) { ershdr->vlan = vlan & 0xff; ershdr->vlan_upper = (vlan >> 8) & 0xf; } static inline u16 get_vlan(const struct erspan_base_hdr *ershdr) { return (ershdr->vlan_upper << 8) + ershdr->vlan; } static inline void set_hwid(struct erspan_md2 *md2, u8 hwid) { md2->hwid = hwid & 0xf; md2->hwid_upper = (hwid >> 4) & 0x3; } static inline u8 get_hwid(const struct erspan_md2 *md2) { return (md2->hwid_upper << 4) + md2->hwid; } static inline int erspan_hdr_len(int version) { if (version == 0) return 0; return sizeof(struct erspan_base_hdr) + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); } static inline u8 tos_to_cos(u8 tos) { u8 dscp, cos; dscp = tos >> 2; cos = dscp >> 3; return cos; } static inline void erspan_build_header(struct sk_buff *skb, u32 id, u32 index, bool truncate, bool is_ipv4) { struct ethhdr *eth = (struct ethhdr *)skb->data; enum erspan_encap_type enc_type; struct erspan_base_hdr *ershdr; struct qtag_prefix { __be16 eth_type; __be16 tci; } *qp; u16 vlan_tci = 0; u8 tos; __be32 *idx; tos = is_ipv4 ? ip_hdr(skb)->tos : (ipv6_hdr(skb)->priority << 4) + (ipv6_hdr(skb)->flow_lbl[0] >> 4); enc_type = ERSPAN_ENCAP_NOVLAN; /* If mirrored packet has vlan tag, extract tci and * preserve vlan header in the mirrored frame. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); vlan_tci = ntohs(qp->tci); enc_type = ERSPAN_ENCAP_INFRAME; } skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); ershdr = (struct erspan_base_hdr *)skb->data; memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); /* Build base header */ ershdr->ver = ERSPAN_VERSION; ershdr->cos = tos_to_cos(tos); ershdr->en = enc_type; ershdr->t = truncate; set_vlan(ershdr, vlan_tci); set_session_id(ershdr, id); /* Build metadata */ idx = (__be32 *)(ershdr + 1); *idx = htonl(index & INDEX_MASK); } /* ERSPAN GRA: timestamp granularity * 00b --> granularity = 100 microseconds * 01b --> granularity = 100 nanoseconds * 10b --> granularity = IEEE 1588 * Here we only support 100 microseconds. */ static inline __be32 erspan_get_timestamp(void) { u64 h_usecs; ktime_t kt; kt = ktime_get_real(); h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); /* ERSPAN base header only has 32-bit, * so it wraps around 4 days. */ return htonl((u32)h_usecs); } /* ERSPAN BSO (Bad/Short/Oversized), see RFC1757 * 00b --> Good frame with no error, or unknown integrity * 01b --> Payload is a Short Frame * 10b --> Payload is an Oversized Frame * 11b --> Payload is a Bad Frame with CRC or Alignment Error */ enum erspan_bso { BSO_NOERROR = 0x0, BSO_SHORT = 0x1, BSO_OVERSIZED = 0x2, BSO_BAD = 0x3, }; static inline u8 erspan_detect_bso(struct sk_buff *skb) { /* BSO_BAD is not handled because the frame CRC * or alignment error information is in FCS. */ if (skb->len < ETH_ZLEN) return BSO_SHORT; if (skb->len > ETH_FRAME_LEN) return BSO_OVERSIZED; return BSO_NOERROR; } static inline void erspan_build_header_v2(struct sk_buff *skb, u32 id, u8 direction, u16 hwid, bool truncate, bool is_ipv4) { struct ethhdr *eth = (struct ethhdr *)skb->data; struct erspan_base_hdr *ershdr; struct erspan_md2 *md2; struct qtag_prefix { __be16 eth_type; __be16 tci; } *qp; u16 vlan_tci = 0; u8 gra = 0; /* 100 usec */ u8 bso = 0; /* Bad/Short/Oversized */ u8 sgt = 0; u8 tos; tos = is_ipv4 ? ip_hdr(skb)->tos : (ipv6_hdr(skb)->priority << 4) + (ipv6_hdr(skb)->flow_lbl[0] >> 4); /* Unlike v1, v2 does not have En field, * so only extract vlan tci field. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); vlan_tci = ntohs(qp->tci); } bso = erspan_detect_bso(skb); skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); ershdr = (struct erspan_base_hdr *)skb->data; memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); /* Build base header */ ershdr->ver = ERSPAN_VERSION2; ershdr->cos = tos_to_cos(tos); ershdr->en = bso; ershdr->t = truncate; set_vlan(ershdr, vlan_tci); set_session_id(ershdr, id); /* Build metadata */ md2 = (struct erspan_md2 *)(ershdr + 1); md2->timestamp = erspan_get_timestamp(); md2->sgt = htons(sgt); md2->p = 1; md2->ft = 0; md2->dir = direction; md2->gra = gra; md2->o = 0; set_hwid(md2, hwid); } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 // SPDX-License-Identifier: GPL-2.0 /* * NVMe over Fabrics RDMA target. * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/atomic.h> #include <linux/blk-integrity.h> #include <linux/ctype.h> #include <linux/delay.h> #include <linux/err.h> #include <linux/init.h> #include <linux/module.h> #include <linux/nvme.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/wait.h> #include <linux/inet.h> #include <linux/unaligned.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <rdma/rw.h> #include <rdma/ib_cm.h> #include <linux/nvme-rdma.h> #include "nvmet.h" /* * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data */ #define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE #define NVMET_RDMA_MAX_INLINE_SGE 4 #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 #define NVMET_RDMA_MAX_METADATA_MDTS 5 #define NVMET_RDMA_BACKLOG 128 #define NVMET_RDMA_DISCRETE_RSP_TAG -1 struct nvmet_rdma_srq; struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; struct ib_recv_wr wr; struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; struct nvme_command *nvme_cmd; struct nvmet_rdma_queue *queue; struct nvmet_rdma_srq *nsrq; }; enum { NVMET_RDMA_REQ_INLINE_DATA = (1 << 0), }; struct nvmet_rdma_rsp { struct ib_sge send_sge; struct ib_cqe send_cqe; struct ib_send_wr send_wr; struct nvmet_rdma_cmd *cmd; struct nvmet_rdma_queue *queue; struct ib_cqe read_cqe; struct ib_cqe write_cqe; struct rdma_rw_ctx rw; struct nvmet_req req; bool allocated; u8 n_rdma; u32 flags; u32 invalidate_rkey; struct list_head wait_list; int tag; }; enum nvmet_rdma_queue_state { NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_DISCONNECTING, }; struct nvmet_rdma_queue { struct rdma_cm_id *cm_id; struct ib_qp *qp; struct nvmet_port *port; struct ib_cq *cq; atomic_t sq_wr_avail; struct nvmet_rdma_device *dev; struct nvmet_rdma_srq *nsrq; spinlock_t state_lock; enum nvmet_rdma_queue_state state; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_rdma_rsp *rsps; struct sbitmap rsp_tags; struct nvmet_rdma_cmd *cmds; struct work_struct release_work; struct list_head rsp_wait_list; struct list_head rsp_wr_wait_list; spinlock_t rsp_wr_wait_lock; int idx; int host_qid; int comp_vector; int recv_queue_size; int send_queue_size; struct list_head queue_list; }; struct nvmet_rdma_port { struct nvmet_port *nport; struct sockaddr_storage addr; struct rdma_cm_id *cm_id; struct delayed_work repair_work; }; struct nvmet_rdma_srq { struct ib_srq *srq; struct nvmet_rdma_cmd *cmds; struct nvmet_rdma_device *ndev; }; struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; struct nvmet_rdma_srq **srqs; int srq_count; size_t srq_size; struct kref ref; struct list_head entry; int inline_data_size; int inline_page_count; }; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); static int srq_size_set(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops srq_size_ops = { .set = srq_size_set, .get = param_get_int, }; static int nvmet_rdma_srq_size = 1024; module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); static DEFINE_IDA(nvmet_rdma_queue_ida); static LIST_HEAD(nvmet_rdma_queue_list); static DEFINE_MUTEX(nvmet_rdma_queue_mutex); static LIST_HEAD(device_list); static DEFINE_MUTEX(device_list_mutex); static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp); static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r); static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r, int tag); static const struct nvmet_fabrics_ops nvmet_rdma_ops; static int srq_size_set(const char *val, const struct kernel_param *kp) { int n = 0, ret; ret = kstrtoint(val, 10, &n); if (ret != 0 || n < 256) return -EINVAL; return param_set_int(val, kp); } static int num_pages(int len) { return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); } static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp) { return nvme_is_write(rsp->req.cmd) && rsp->req.transfer_len && !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); } static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp) { return !nvme_is_write(rsp->req.cmd) && rsp->req.transfer_len && !rsp->req.cqe->status && !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); } static inline struct nvmet_rdma_rsp * nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) { struct nvmet_rdma_rsp *rsp = NULL; int tag; tag = sbitmap_get(&queue->rsp_tags); if (tag >= 0) rsp = &queue->rsps[tag]; if (unlikely(!rsp)) { int ret; rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); if (unlikely(!rsp)) return NULL; ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, NVMET_RDMA_DISCRETE_RSP_TAG); if (unlikely(ret)) { kfree(rsp); return NULL; } } return rsp; } static inline void nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { if (unlikely(rsp->tag == NVMET_RDMA_DISCRETE_RSP_TAG)) { nvmet_rdma_free_rsp(rsp->queue->dev, rsp); kfree(rsp); return; } sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag); } static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c) { struct scatterlist *sg; struct ib_sge *sge; int i; if (!ndev->inline_data_size) return; sg = c->inline_sg; sge = &c->sge[1]; for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { if (sge->length) ib_dma_unmap_page(ndev->device, sge->addr, sge->length, DMA_FROM_DEVICE); if (sg_page(sg)) __free_page(sg_page(sg)); } } static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c) { struct scatterlist *sg; struct ib_sge *sge; struct page *pg; int len; int i; if (!ndev->inline_data_size) return 0; sg = c->inline_sg; sg_init_table(sg, ndev->inline_page_count); sge = &c->sge[1]; len = ndev->inline_data_size; for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { pg = alloc_page(GFP_KERNEL); if (!pg) goto out_err; sg_assign_page(sg, pg); sge->addr = ib_dma_map_page(ndev->device, pg, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(ndev->device, sge->addr)) goto out_err; sge->length = min_t(int, len, PAGE_SIZE); sge->lkey = ndev->pd->local_dma_lkey; len -= sge->length; } return 0; out_err: for (; i >= 0; i--, sg--, sge--) { if (sge->length) ib_dma_unmap_page(ndev->device, sge->addr, sge->length, DMA_FROM_DEVICE); if (sg_page(sg)) __free_page(sg_page(sg)); } return -ENOMEM; } static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c, bool admin) { /* NVMe command / RDMA RECV */ c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL); if (!c->nvme_cmd) goto out; c->sge[0].addr = ib_dma_map_single(ndev->device, c->nvme_cmd, sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); if (ib_dma_mapping_error(ndev->device, c->sge[0].addr)) goto out_free_cmd; c->sge[0].length = sizeof(*c->nvme_cmd); c->sge[0].lkey = ndev->pd->local_dma_lkey; if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c)) goto out_unmap_cmd; c->cqe.done = nvmet_rdma_recv_done; c->wr.wr_cqe = &c->cqe; c->wr.sg_list = c->sge; c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1; return 0; out_unmap_cmd: ib_dma_unmap_single(ndev->device, c->sge[0].addr, sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); out_free_cmd: kfree(c->nvme_cmd); out: return -ENOMEM; } static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c, bool admin) { if (!admin) nvmet_rdma_free_inline_pages(ndev, c); ib_dma_unmap_single(ndev->device, c->sge[0].addr, sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); kfree(c->nvme_cmd); } static struct nvmet_rdma_cmd * nvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev, int nr_cmds, bool admin) { struct nvmet_rdma_cmd *cmds; int ret = -EINVAL, i; cmds = kcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL); if (!cmds) goto out; for (i = 0; i < nr_cmds; i++) { ret = nvmet_rdma_alloc_cmd(ndev, cmds + i, admin); if (ret) goto out_free; } return cmds; out_free: while (--i >= 0) nvmet_rdma_free_cmd(ndev, cmds + i, admin); kfree(cmds); out: return ERR_PTR(ret); } static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmds, int nr_cmds, bool admin) { int i; for (i = 0; i < nr_cmds; i++) nvmet_rdma_free_cmd(ndev, cmds + i, admin); kfree(cmds); } static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r, int tag) { /* NVMe CQE / RDMA SEND */ r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); if (!r->req.cqe) goto out; r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe, sizeof(*r->req.cqe), DMA_TO_DEVICE); if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; if (ib_dma_pci_p2p_dma_supported(ndev->device)) r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; r->send_cqe.done = nvmet_rdma_send_done; r->send_wr.wr_cqe = &r->send_cqe; r->send_wr.sg_list = &r->send_sge; r->send_wr.num_sge = 1; r->send_wr.send_flags = IB_SEND_SIGNALED; /* Data In / RDMA READ */ r->read_cqe.done = nvmet_rdma_read_data_done; /* Data Out / RDMA WRITE */ r->write_cqe.done = nvmet_rdma_write_data_done; r->tag = tag; return 0; out_free_rsp: kfree(r->req.cqe); out: return -ENOMEM; } static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r) { ib_dma_unmap_single(ndev->device, r->send_sge.addr, sizeof(*r->req.cqe), DMA_TO_DEVICE); kfree(r->req.cqe); } static int nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) { struct nvmet_rdma_device *ndev = queue->dev; int nr_rsps = queue->recv_queue_size * 2; int ret = -ENOMEM, i; if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL, NUMA_NO_NODE, false, true)) goto out; queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), GFP_KERNEL); if (!queue->rsps) goto out_free_sbitmap; for (i = 0; i < nr_rsps; i++) { struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; ret = nvmet_rdma_alloc_rsp(ndev, rsp, i); if (ret) goto out_free; } return 0; out_free: while (--i >= 0) nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); kfree(queue->rsps); out_free_sbitmap: sbitmap_free(&queue->rsp_tags); out: return ret; } static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) { struct nvmet_rdma_device *ndev = queue->dev; int i, nr_rsps = queue->recv_queue_size * 2; for (i = 0; i < nr_rsps; i++) nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); kfree(queue->rsps); sbitmap_free(&queue->rsp_tags); } static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmd) { int ret; ib_dma_sync_single_for_device(ndev->device, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); if (cmd->nsrq) ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); else ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); if (unlikely(ret)) pr_err("post_recv cmd failed\n"); return ret; } static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) { spin_lock(&queue->rsp_wr_wait_lock); while (!list_empty(&queue->rsp_wr_wait_list)) { struct nvmet_rdma_rsp *rsp; bool ret; rsp = list_entry(queue->rsp_wr_wait_list.next, struct nvmet_rdma_rsp, wait_list); list_del(&rsp->wait_list); spin_unlock(&queue->rsp_wr_wait_lock); ret = nvmet_rdma_execute_command(rsp); spin_lock(&queue->rsp_wr_wait_lock); if (!ret) { list_add(&rsp->wait_list, &queue->rsp_wr_wait_list); break; } } spin_unlock(&queue->rsp_wr_wait_lock); } static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) { struct ib_mr_status mr_status; int ret; u16 status = 0; ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { pr_err("ib_check_mr_status failed, ret %d\n", ret); return NVME_SC_INVALID_PI; } if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { switch (mr_status.sig_err.err_type) { case IB_SIG_BAD_GUARD: status = NVME_SC_GUARD_CHECK; break; case IB_SIG_BAD_REFTAG: status = NVME_SC_REFTAG_CHECK; break; case IB_SIG_BAD_APPTAG: status = NVME_SC_APPTAG_CHECK; break; } pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", mr_status.sig_err.err_type, mr_status.sig_err.expected, mr_status.sig_err.actual); } return status; } static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, struct nvme_command *cmd, struct ib_sig_domain *domain, u16 control, u8 pi_type) { domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.bg_type = IB_T10DIF_CRC; domain->sig.dif.pi_interval = 1 << bi->interval_exp; domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); if (control & NVME_RW_PRINFO_PRCHK_REF) domain->sig.dif.ref_remap = true; domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat); domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm); domain->sig.dif.app_escape = true; if (pi_type == NVME_NS_DPS_PI_TYPE3) domain->sig.dif.ref_escape = true; } static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, struct ib_sig_attrs *sig_attrs) { struct nvme_command *cmd = req->cmd; u16 control = le16_to_cpu(cmd->rw.control); u8 pi_type = req->ns->pi_type; struct blk_integrity *bi; bi = bdev_get_integrity(req->ns->bdev); memset(sig_attrs, 0, sizeof(*sig_attrs)); if (control & NVME_RW_PRINFO_PRACT) { /* for WRITE_INSERT/READ_STRIP no wire domain */ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, pi_type); /* Clear the PRACT bit since HCA will generate/verify the PI */ control &= ~NVME_RW_PRINFO_PRACT; cmd->rw.control = cpu_to_le16(control); /* PI is added by the HW */ req->transfer_len += req->metadata_len; } else { /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, pi_type); nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, pi_type); } if (control & NVME_RW_PRINFO_PRCHK_REF) sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; if (control & NVME_RW_PRINFO_PRCHK_GUARD) sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; if (control & NVME_RW_PRINFO_PRCHK_APP) sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; } static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, struct ib_sig_attrs *sig_attrs) { struct rdma_cm_id *cm_id = rsp->queue->cm_id; struct nvmet_req *req = &rsp->req; int ret; if (req->metadata_len) ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, cm_id->port_num, req->sg, req->sg_cnt, req->metadata_sg, req->metadata_sg_cnt, sig_attrs, addr, key, nvmet_data_dir(req)); else ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, req->sg, req->sg_cnt, 0, addr, key, nvmet_data_dir(req)); return ret; } static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) { struct rdma_cm_id *cm_id = rsp->queue->cm_id; struct nvmet_req *req = &rsp->req; if (req->metadata_len) rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, cm_id->port_num, req->sg, req->sg_cnt, req->metadata_sg, req->metadata_sg_cnt, nvmet_data_dir(req)); else rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, req->sg, req->sg_cnt, nvmet_data_dir(req)); } static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) { struct nvmet_rdma_queue *queue = rsp->queue; atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); if (rsp->n_rdma) nvmet_rdma_rw_ctx_destroy(rsp); if (rsp->req.sg != rsp->cmd->inline_sg) nvmet_req_free_sgls(&rsp->req); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) nvmet_rdma_process_wr_wait_list(queue); nvmet_rdma_put_rsp(rsp); } static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue) { if (queue->nvme_sq.ctrl) { nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); } else { /* * we didn't setup the controller yet in case * of admin connect error, just disconnect and * cleanup the queue */ nvmet_rdma_queue_disconnect(queue); } } static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); struct nvmet_rdma_queue *queue = wc->qp->qp_context; nvmet_rdma_release_rsp(rsp); if (unlikely(wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)) { pr_err("SEND for CQE 0x%p failed with status %s (%d).\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } } static void nvmet_rdma_queue_response(struct nvmet_req *req) { struct nvmet_rdma_rsp *rsp = container_of(req, struct nvmet_rdma_rsp, req); struct rdma_cm_id *cm_id = rsp->queue->cm_id; struct ib_send_wr *first_wr; if (rsp->invalidate_rkey) { rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey; } else { rsp->send_wr.opcode = IB_WR_SEND; } if (nvmet_rdma_need_data_out(rsp)) { if (rsp->req.metadata_len) first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, cm_id->port_num, &rsp->write_cqe, NULL); else first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, cm_id->port_num, NULL, &rsp->send_wr); } else { first_wr = &rsp->send_wr; } nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); ib_dma_sync_single_for_device(rsp->queue->dev->device, rsp->send_sge.addr, rsp->send_sge.length, DMA_TO_DEVICE); if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); } } static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); struct nvmet_rdma_queue *queue = wc->qp->qp_context; u16 status = 0; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } return; } if (rsp->req.metadata_len) status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); nvmet_rdma_rw_ctx_destroy(rsp); if (unlikely(status)) nvmet_req_complete(&rsp->req, status); else rsp->req.execute(&rsp->req); } static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); struct nvmet_rdma_queue *queue = wc->qp->qp_context; struct rdma_cm_id *cm_id = rsp->queue->cm_id; u16 status; if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) return; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_info("RDMA WRITE for CQE failed with status %s (%d).\n", ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } return; } /* * Upon RDMA completion check the signature status * - if succeeded send good NVMe response * - if failed send bad NVMe response with appropriate error */ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); if (unlikely(status)) rsp->req.cqe->status = cpu_to_le16(status << 1); nvmet_rdma_rw_ctx_destroy(rsp); if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); } } static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, u64 off) { int sg_count = num_pages(len); struct scatterlist *sg; int i; sg = rsp->cmd->inline_sg; for (i = 0; i < sg_count; i++, sg++) { if (i < sg_count - 1) sg_unmark_end(sg); else sg_mark_end(sg); sg->offset = off; sg->length = min_t(int, len, PAGE_SIZE - off); len -= sg->length; if (!i) off = 0; } rsp->req.sg = rsp->cmd->inline_sg; rsp->req.sg_cnt = sg_count; } static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) { struct nvme_sgl_desc *sgl = &rsp->req.cmd->common.dptr.sgl; u64 off = le64_to_cpu(sgl->addr); u32 len = le32_to_cpu(sgl->length); if (!nvme_is_write(rsp->req.cmd)) { rsp->req.error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; } if (off + len > rsp->queue->dev->inline_data_size) { pr_err("invalid inline data offset!\n"); return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR; } /* no data command? */ if (!len) return 0; nvmet_rdma_use_inline_sg(rsp, len, off); rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA; rsp->req.transfer_len += len; return 0; } static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, struct nvme_keyed_sgl_desc *sgl, bool invalidate) { u64 addr = le64_to_cpu(sgl->addr); u32 key = get_unaligned_le32(sgl->key); struct ib_sig_attrs sig_attrs; int ret; rsp->req.transfer_len = get_unaligned_le24(sgl->length); /* no data command? */ if (!rsp->req.transfer_len) return 0; if (rsp->req.metadata_len) nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; if (invalidate) rsp->invalidate_rkey = key; return 0; error_out: rsp->req.transfer_len = 0; return NVME_SC_INTERNAL; } static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp) { struct nvme_keyed_sgl_desc *sgl = &rsp->req.cmd->common.dptr.ksgl; switch (sgl->type >> 4) { case NVME_SGL_FMT_DATA_DESC: switch (sgl->type & 0xf) { case NVME_SGL_FMT_OFFSET: return nvmet_rdma_map_sgl_inline(rsp); default: pr_err("invalid SGL subtype: %#x\n", sgl->type); rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; } case NVME_KEY_SGL_FMT_DATA_DESC: switch (sgl->type & 0xf) { case NVME_SGL_FMT_ADDRESS | NVME_SGL_FMT_INVALIDATE: return nvmet_rdma_map_sgl_keyed(rsp, sgl, true); case NVME_SGL_FMT_ADDRESS: return nvmet_rdma_map_sgl_keyed(rsp, sgl, false); default: pr_err("invalid SGL subtype: %#x\n", sgl->type); rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; } default: pr_err("invalid SGL type: %#x\n", sgl->type); rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_SGL_INVALID_TYPE | NVME_STATUS_DNR; } } static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) { struct nvmet_rdma_queue *queue = rsp->queue; if (unlikely(atomic_sub_return(1 + rsp->n_rdma, &queue->sq_wr_avail) < 0)) { pr_debug("IB send queue full (needed %d): queue %u cntlid %u\n", 1 + rsp->n_rdma, queue->idx, queue->nvme_sq.ctrl->cntlid); atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); return false; } if (nvmet_rdma_need_data_in(rsp)) { if (rdma_rw_ctx_post(&rsp->rw, queue->qp, queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { rsp->req.execute(&rsp->req); } return true; } static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, struct nvmet_rdma_rsp *cmd) { u16 status; ib_dma_sync_single_for_cpu(queue->dev->device, cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, DMA_FROM_DEVICE); ib_dma_sync_single_for_cpu(queue->dev->device, cmd->send_sge.addr, cmd->send_sge.length, DMA_TO_DEVICE); if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_rdma_ops)) return; status = nvmet_rdma_map_sgl(cmd); if (status) goto out_err; if (unlikely(!nvmet_rdma_execute_command(cmd))) { spin_lock(&queue->rsp_wr_wait_lock); list_add_tail(&cmd->wait_list, &queue->rsp_wr_wait_list); spin_unlock(&queue->rsp_wr_wait_lock); } return; out_err: nvmet_req_complete(&cmd->req, status); } static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_cmd *cmd = container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe); struct nvmet_rdma_queue *queue = wc->qp->qp_context; struct nvmet_rdma_rsp *rsp; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("RECV for CQE 0x%p failed with status %s (%d)\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } return; } if (unlikely(wc->byte_len < sizeof(struct nvme_command))) { pr_err("Ctrl Fatal Error: capsule size less than 64 bytes\n"); nvmet_rdma_error_comp(queue); return; } cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); if (unlikely(!rsp)) { /* * we get here only under memory pressure, * silently drop and have the host retry * as we can't even fail it. */ nvmet_rdma_post_recv(queue->dev, cmd); return; } rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; rsp->req.port = queue->port; rsp->n_rdma = 0; rsp->invalidate_rkey = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; spin_lock_irqsave(&queue->state_lock, flags); if (queue->state == NVMET_RDMA_Q_CONNECTING) list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); else nvmet_rdma_put_rsp(rsp); spin_unlock_irqrestore(&queue->state_lock, flags); return; } nvmet_rdma_handle_command(queue, rsp); } static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) { nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, false); ib_destroy_srq(nsrq->srq); kfree(nsrq); } static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) { int i; if (!ndev->srqs) return; for (i = 0; i < ndev->srq_count; i++) nvmet_rdma_destroy_srq(ndev->srqs[i]); kfree(ndev->srqs); } static struct nvmet_rdma_srq * nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) { struct ib_srq_init_attr srq_attr = { NULL, }; size_t srq_size = ndev->srq_size; struct nvmet_rdma_srq *nsrq; struct ib_srq *srq; int ret, i; nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); if (!nsrq) return ERR_PTR(-ENOMEM); srq_attr.attr.max_wr = srq_size; srq_attr.attr.max_sge = 1 + ndev->inline_page_count; srq_attr.attr.srq_limit = 0; srq_attr.srq_type = IB_SRQT_BASIC; srq = ib_create_srq(ndev->pd, &srq_attr); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto out_free; } nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); if (IS_ERR(nsrq->cmds)) { ret = PTR_ERR(nsrq->cmds); goto out_destroy_srq; } nsrq->srq = srq; nsrq->ndev = ndev; for (i = 0; i < srq_size; i++) { nsrq->cmds[i].nsrq = nsrq; ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); if (ret) goto out_free_cmds; } return nsrq; out_free_cmds: nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); out_destroy_srq: ib_destroy_srq(srq); out_free: kfree(nsrq); return ERR_PTR(ret); } static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) { int i, ret; if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { /* * If SRQs aren't supported we just go ahead and use normal * non-shared receive queues. */ pr_info("SRQ requested but not supported.\n"); return 0; } ndev->srq_size = min(ndev->device->attrs.max_srq_wr, nvmet_rdma_srq_size); ndev->srq_count = min(ndev->device->num_comp_vectors, ndev->device->attrs.max_srq); ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); if (!ndev->srqs) return -ENOMEM; for (i = 0; i < ndev->srq_count; i++) { ndev->srqs[i] = nvmet_rdma_init_srq(ndev); if (IS_ERR(ndev->srqs[i])) { ret = PTR_ERR(ndev->srqs[i]); goto err_srq; } } return 0; err_srq: while (--i >= 0) nvmet_rdma_destroy_srq(ndev->srqs[i]); kfree(ndev->srqs); return ret; } static void nvmet_rdma_free_dev(struct kref *ref) { struct nvmet_rdma_device *ndev = container_of(ref, struct nvmet_rdma_device, ref); mutex_lock(&device_list_mutex); list_del(&ndev->entry); mutex_unlock(&device_list_mutex); nvmet_rdma_destroy_srqs(ndev); ib_dealloc_pd(ndev->pd); kfree(ndev); } static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { struct nvmet_rdma_port *port = cm_id->context; struct nvmet_port *nport = port->nport; struct nvmet_rdma_device *ndev; int inline_page_count; int inline_sge_count; int ret; mutex_lock(&device_list_mutex); list_for_each_entry(ndev, &device_list, entry) { if (ndev->device->node_guid == cm_id->device->node_guid && kref_get_unless_zero(&ndev->ref)) goto out_unlock; } ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); if (!ndev) goto out_err; inline_page_count = num_pages(nport->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", nport->inline_data_size, cm_id->device->name, inline_sge_count * PAGE_SIZE); nport->inline_data_size = inline_sge_count * PAGE_SIZE; inline_page_count = inline_sge_count; } ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER)) { pr_warn("T10-PI is not supported by device %s. Disabling it\n", cm_id->device->name); nport->pi_enable = false; } ndev->device = cm_id->device; kref_init(&ndev->ref); ndev->pd = ib_alloc_pd(ndev->device, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; if (nvmet_rdma_use_srq) { ret = nvmet_rdma_init_srqs(ndev); if (ret) goto out_free_pd; } list_add(&ndev->entry, &device_list); out_unlock: mutex_unlock(&device_list_mutex); pr_debug("added %s.\n", ndev->device->name); return ndev; out_free_pd: ib_dealloc_pd(ndev->pd); out_free_dev: kfree(ndev); out_err: mutex_unlock(&device_list_mutex); return NULL; } static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr = { }; struct nvmet_rdma_device *ndev = queue->dev; int nr_cqe, ret, i, factor; /* * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. */ nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1, queue->comp_vector, IB_POLL_WORKQUEUE); if (IS_ERR(queue->cq)) { ret = PTR_ERR(queue->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", nr_cqe + 1, ret); goto out; } qp_attr.qp_context = queue; qp_attr.event_handler = nvmet_rdma_qp_event; qp_attr.send_cq = queue->cq; qp_attr.recv_cq = queue->cq; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; /* +1 for drain */ qp_attr.cap.max_send_wr = queue->send_queue_size + 1; factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num, 1 << NVMET_RDMA_MAX_MDTS); qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); if (queue->nsrq) { qp_attr.srq = queue->nsrq->srq; } else { /* +1 for drain */ qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } if (queue->port->pi_enable && queue->host_qid) qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); if (ret) { pr_err("failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } queue->qp = queue->cm_id->qp; atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, qp_attr.cap.max_send_wr, queue->cm_id); if (!queue->nsrq) { for (i = 0; i < queue->recv_queue_size; i++) { queue->cmds[i].queue = queue; ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); if (ret) goto err_destroy_qp; } } out: return ret; err_destroy_qp: rdma_destroy_qp(queue->cm_id); err_destroy_cq: ib_cq_pool_put(queue->cq, nr_cqe + 1); goto out; } static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { ib_drain_qp(queue->qp); if (queue->cm_id) rdma_destroy_id(queue->cm_id); ib_destroy_qp(queue->qp); ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 * queue->send_queue_size + 1); } static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) { pr_debug("freeing queue %d\n", queue->idx); nvmet_sq_destroy(&queue->nvme_sq); nvmet_rdma_destroy_queue_ib(queue); if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); } nvmet_rdma_free_rsps(queue); ida_free(&nvmet_rdma_queue_ida, queue->idx); kfree(queue); } static void nvmet_rdma_release_queue_work(struct work_struct *w) { struct nvmet_rdma_queue *queue = container_of(w, struct nvmet_rdma_queue, release_work); struct nvmet_rdma_device *dev = queue->dev; nvmet_rdma_free_queue(queue); kref_put(&dev->ref, nvmet_rdma_free_dev); } static int nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn, struct nvmet_rdma_queue *queue) { struct nvme_rdma_cm_req *req; req = (struct nvme_rdma_cm_req *)conn->private_data; if (!req || conn->private_data_len == 0) return NVME_RDMA_CM_INVALID_LEN; if (le16_to_cpu(req->recfmt) != NVME_RDMA_CM_FMT_1_0) return NVME_RDMA_CM_INVALID_RECFMT; queue->host_qid = le16_to_cpu(req->qid); /* * req->hsqsize corresponds to our recv queue size plus 1 * req->hrqsize corresponds to our send queue size */ queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; queue->send_queue_size = le16_to_cpu(req->hrqsize); if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH) return NVME_RDMA_CM_INVALID_HSQSIZE; /* XXX: Should we enforce some kind of max for IO queues? */ return 0; } static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, enum nvme_rdma_cm_status status) { struct nvme_rdma_cm_rej rej; pr_debug("rejecting connect request: status %d (%s)\n", status, nvme_rdma_cm_msg(status)); rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); rej.sts = cpu_to_le16(status); return rdma_reject(cm_id, (void *)&rej, sizeof(rej), IB_CM_REJ_CONSUMER_DEFINED); } static struct nvmet_rdma_queue * nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_queue *queue; int ret; queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) { ret = NVME_RDMA_CM_NO_RSC; goto out_reject; } ret = nvmet_sq_init(&queue->nvme_sq); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_free_queue; } ret = nvmet_rdma_parse_cm_connect_req(&event->param.conn, queue); if (ret) goto out_destroy_sq; /* * Schedules the actual release because calling rdma_destroy_id from * inside a CM callback would trigger a deadlock. (great API design..) */ INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); queue->dev = ndev; queue->cm_id = cm_id; queue->port = port->nport; spin_lock_init(&queue->state_lock); queue->state = NVMET_RDMA_Q_CONNECTING; INIT_LIST_HEAD(&queue->rsp_wait_list); INIT_LIST_HEAD(&queue->rsp_wr_wait_list); spin_lock_init(&queue->rsp_wr_wait_lock); INIT_LIST_HEAD(&queue->queue_list); queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = NVME_RDMA_CM_NO_RSC; goto out_destroy_sq; } /* * Spread the io queues across completion vectors, * but still keep all admin queues on vector 0. */ queue->comp_vector = !queue->host_qid ? 0 : queue->idx % ndev->device->num_comp_vectors; ret = nvmet_rdma_alloc_rsps(queue); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_ida_remove; } if (ndev->srqs) { queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; } else { queue->cmds = nvmet_rdma_alloc_cmds(ndev, queue->recv_queue_size, !queue->host_qid); if (IS_ERR(queue->cmds)) { ret = NVME_RDMA_CM_NO_RSC; goto out_free_responses; } } ret = nvmet_rdma_create_queue_ib(queue); if (ret) { pr_err("%s: creating RDMA queue failed (%d).\n", __func__, ret); ret = NVME_RDMA_CM_NO_RSC; goto out_free_cmds; } return queue; out_free_cmds: if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); } out_free_responses: nvmet_rdma_free_rsps(queue); out_ida_remove: ida_free(&nvmet_rdma_queue_ida, queue->idx); out_destroy_sq: nvmet_sq_destroy(&queue->nvme_sq); out_free_queue: kfree(queue); out_reject: nvmet_rdma_cm_reject(cm_id, ret); return NULL; } static void nvmet_rdma_qp_event(struct ib_event *event, void *priv) { struct nvmet_rdma_queue *queue = priv; switch (event->event) { case IB_EVENT_COMM_EST: rdma_notify(queue->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: pr_debug("received last WQE reached event for queue=0x%p\n", queue); break; default: pr_err("received IB QP event: %s (%d)\n", ib_event_msg(event->event), event->event); break; } } static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue, struct rdma_conn_param *p) { struct rdma_conn_param param = { }; struct nvme_rdma_cm_rep priv = { }; int ret = -ENOMEM; param.rnr_retry_count = 7; param.flow_control = 1; param.initiator_depth = min_t(u8, p->initiator_depth, queue->dev->device->attrs.max_qp_init_rd_atom); param.private_data = &priv; param.private_data_len = sizeof(priv); priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); priv.crqsize = cpu_to_le16(queue->recv_queue_size); ret = rdma_accept(cm_id, &param); if (ret) pr_err("rdma_accept failed (error code = %d)\n", ret); return ret; } static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; ndev = nvmet_rdma_find_get_device(cm_id); if (!ndev) { nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC); return -ECONNREFUSED; } queue = nvmet_rdma_alloc_queue(ndev, cm_id, event); if (!queue) { ret = -ENOMEM; goto put_device; } if (queue->host_qid == 0) { struct nvmet_rdma_queue *q; int pending = 0; /* Check for pending controller teardown */ mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) { if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl && q->state == NVMET_RDMA_Q_DISCONNECTING) pending++; } mutex_unlock(&nvmet_rdma_queue_mutex); if (pending > NVMET_RDMA_BACKLOG) return NVME_SC_CONNECT_CTRL_BUSY; } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { /* * Don't destroy the cm_id in free path, as we implicitly * destroy the cm_id here with non-zero ret code. */ queue->cm_id = NULL; goto free_queue; } mutex_lock(&nvmet_rdma_queue_mutex); list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list); mutex_unlock(&nvmet_rdma_queue_mutex); return 0; free_queue: nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev); return ret; } static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue) { unsigned long flags; spin_lock_irqsave(&queue->state_lock, flags); if (queue->state != NVMET_RDMA_Q_CONNECTING) { pr_warn("trying to establish a connected queue\n"); goto out_unlock; } queue->state = NVMET_RDMA_Q_LIVE; while (!list_empty(&queue->rsp_wait_list)) { struct nvmet_rdma_rsp *cmd; cmd = list_first_entry(&queue->rsp_wait_list, struct nvmet_rdma_rsp, wait_list); list_del(&cmd->wait_list); spin_unlock_irqrestore(&queue->state_lock, flags); nvmet_rdma_handle_command(queue, cmd); spin_lock_irqsave(&queue->state_lock, flags); } out_unlock: spin_unlock_irqrestore(&queue->state_lock, flags); } static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) { bool disconnect = false; unsigned long flags; pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state); spin_lock_irqsave(&queue->state_lock, flags); switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: while (!list_empty(&queue->rsp_wait_list)) { struct nvmet_rdma_rsp *rsp; rsp = list_first_entry(&queue->rsp_wait_list, struct nvmet_rdma_rsp, wait_list); list_del(&rsp->wait_list); nvmet_rdma_put_rsp(rsp); } fallthrough; case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; disconnect = true; break; case NVMET_RDMA_Q_DISCONNECTING: break; } spin_unlock_irqrestore(&queue->state_lock, flags); if (disconnect) { rdma_disconnect(queue->cm_id); queue_work(nvmet_wq, &queue->release_work); } } static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) { bool disconnect = false; mutex_lock(&nvmet_rdma_queue_mutex); if (!list_empty(&queue->queue_list)) { list_del_init(&queue->queue_list); disconnect = true; } mutex_unlock(&nvmet_rdma_queue_mutex); if (disconnect) __nvmet_rdma_queue_disconnect(queue); } static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING); mutex_lock(&nvmet_rdma_queue_mutex); if (!list_empty(&queue->queue_list)) list_del_init(&queue->queue_list); mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); queue_work(nvmet_wq, &queue->release_work); } /** * nvmet_rdma_device_removal() - Handle RDMA device removal * @cm_id: rdma_cm id, used for nvmet port * @queue: nvmet rdma queue (cm id qp_context) * * DEVICE_REMOVAL event notifies us that the RDMA device is about * to unplug. Note that this event can be generated on a normal * queue cm_id and/or a device bound listener cm_id (where in this * case queue will be null). * * We registered an ib_client to handle device removal for queues, * so we only need to handle the listening port cm_ids. In this case * we nullify the priv to prevent double cm_id destruction and destroying * the cm_id implicitely by returning a non-zero rc to the callout. */ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { struct nvmet_rdma_port *port; if (queue) { /* * This is a queue cm_id. we have registered * an ib_client to handle queues removal * so don't interfear and just return. */ return 0; } port = cm_id->context; /* * This is a listener cm_id. Make sure that * future remove_port won't invoke a double * cm_id destroy. use atomic xchg to make sure * we don't compete with remove_port. */ if (xchg(&port->cm_id, NULL) != cm_id) return 0; /* * We need to return 1 so that the core will destroy * it's own ID. What a great API design.. */ return 1; } static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct nvmet_rdma_queue *queue = NULL; int ret = 0; if (cm_id->qp) queue = cm_id->qp->qp_context; pr_debug("%s (%d): status %d id %p\n", rdma_event_msg(event->event), event->event, event->status, cm_id); switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: ret = nvmet_rdma_queue_connect(cm_id, event); break; case RDMA_CM_EVENT_ESTABLISHED: nvmet_rdma_queue_established(queue); break; case RDMA_CM_EVENT_ADDR_CHANGE: if (!queue) { struct nvmet_rdma_port *port = cm_id->context; queue_delayed_work(nvmet_wq, &port->repair_work, 0); break; } fallthrough; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); break; case RDMA_CM_EVENT_REJECTED: pr_debug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); fallthrough; case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_CONNECT_ERROR: nvmet_rdma_queue_connect_fail(cm_id, queue); break; default: pr_err("received unrecognized RDMA CM event %d\n", event->event); break; } return ret; } static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) { struct nvmet_rdma_queue *queue, *n; mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry_safe(queue, n, &nvmet_rdma_queue_list, queue_list) { if (queue->nvme_sq.ctrl != ctrl) continue; list_del_init(&queue->queue_list); __nvmet_rdma_queue_disconnect(queue); } mutex_unlock(&nvmet_rdma_queue_mutex); } static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port) { struct nvmet_rdma_queue *queue, *tmp; struct nvmet_port *nport = port->nport; mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, queue_list) { if (queue->port != nport) continue; list_del_init(&queue->queue_list); __nvmet_rdma_queue_disconnect(queue); } mutex_unlock(&nvmet_rdma_queue_mutex); } static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); if (cm_id) rdma_destroy_id(cm_id); /* * Destroy the remaining queues, which are not belong to any * controller yet. Do it here after the RDMA-CM was destroyed * guarantees that no new queue will be created. */ nvmet_rdma_destroy_port_queues(port); } static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) { struct sockaddr *addr = (struct sockaddr *)&port->addr; struct rdma_cm_id *cm_id; int ret; cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { pr_err("CM ID creation failed\n"); return PTR_ERR(cm_id); } /* * Allow both IPv4 and IPv6 sockets to bind a single port * at the same time. */ ret = rdma_set_afonly(cm_id, 1); if (ret) { pr_err("rdma_set_afonly failed (%d)\n", ret); goto out_destroy_id; } ret = rdma_bind_addr(cm_id, addr); if (ret) { pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG); if (ret) { pr_err("listening to %pISpcs failed (%d)\n", addr, ret); goto out_destroy_id; } port->cm_id = cm_id; return 0; out_destroy_id: rdma_destroy_id(cm_id); return ret; } static void nvmet_rdma_repair_port_work(struct work_struct *w) { struct nvmet_rdma_port *port = container_of(to_delayed_work(w), struct nvmet_rdma_port, repair_work); int ret; nvmet_rdma_disable_port(port); ret = nvmet_rdma_enable_port(port); if (ret) queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); } static int nvmet_rdma_add_port(struct nvmet_port *nport) { struct nvmet_rdma_port *port; __kernel_sa_family_t af; int ret; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return -ENOMEM; nport->priv = port; port->nport = nport; INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); switch (nport->disc_addr.adrfam) { case NVMF_ADDR_FAMILY_IP4: af = AF_INET; break; case NVMF_ADDR_FAMILY_IP6: af = AF_INET6; break; default: pr_err("address family %d not supported\n", nport->disc_addr.adrfam); ret = -EINVAL; goto out_free_port; } if (nport->inline_data_size < 0) { nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { pr_warn("inline_data_size %u is too large, reducing to %u\n", nport->inline_data_size, NVMET_RDMA_MAX_INLINE_DATA_SIZE); nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; } if (nport->max_queue_size < 0) { nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE; } else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) { pr_warn("max_queue_size %u is too large, reducing to %u\n", nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE); nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; } ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, nport->disc_addr.trsvcid, &port->addr); if (ret) { pr_err("malformed ip/port passed: %s:%s\n", nport->disc_addr.traddr, nport->disc_addr.trsvcid); goto out_free_port; } ret = nvmet_rdma_enable_port(port); if (ret) goto out_free_port; pr_info("enabling port %d (%pISpcs)\n", le16_to_cpu(nport->disc_addr.portid), (struct sockaddr *)&port->addr); return 0; out_free_port: kfree(port); return ret; } static void nvmet_rdma_remove_port(struct nvmet_port *nport) { struct nvmet_rdma_port *port = nport->priv; cancel_delayed_work_sync(&port->repair_work); nvmet_rdma_disable_port(port); kfree(port); } static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, struct nvmet_port *nport, char *traddr) { struct nvmet_rdma_port *port = nport->priv; struct rdma_cm_id *cm_id = port->cm_id; if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { struct nvmet_rdma_rsp *rsp = container_of(req, struct nvmet_rdma_rsp, req); struct rdma_cm_id *req_cm_id = rsp->queue->cm_id; struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr; sprintf(traddr, "%pISc", addr); } else { memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); } } static ssize_t nvmet_rdma_host_port_addr(struct nvmet_ctrl *ctrl, char *traddr, size_t traddr_len) { struct nvmet_sq *nvme_sq = ctrl->sqs[0]; struct nvmet_rdma_queue *queue = container_of(nvme_sq, struct nvmet_rdma_queue, nvme_sq); return snprintf(traddr, traddr_len, "%pISc", (struct sockaddr *)&queue->cm_id->route.addr.dst_addr); } static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) { if (ctrl->pi_support) return NVMET_RDMA_MAX_METADATA_MDTS; return NVMET_RDMA_MAX_MDTS; } static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) { if (ctrl->pi_support) return NVME_RDMA_MAX_METADATA_QUEUE_SIZE; return NVME_RDMA_MAX_QUEUE_SIZE; } static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, .msdbd = 1, .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED, .add_port = nvmet_rdma_add_port, .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, .disc_traddr = nvmet_rdma_disc_port_addr, .host_traddr = nvmet_rdma_host_port_addr, .get_mdts = nvmet_rdma_get_mdts, .get_max_queue_size = nvmet_rdma_get_max_queue_size, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvmet_rdma_queue *queue, *tmp; struct nvmet_rdma_device *ndev; bool found = false; mutex_lock(&device_list_mutex); list_for_each_entry(ndev, &device_list, entry) { if (ndev->device == ib_device) { found = true; break; } } mutex_unlock(&device_list_mutex); if (!found) return; /* * IB Device that is used by nvmet controllers is being removed, * delete all queues using this device. */ mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, queue_list) { if (queue->dev->device != ib_device) continue; pr_info("Removing queue %d\n", queue->idx); list_del_init(&queue->queue_list); __nvmet_rdma_queue_disconnect(queue); } mutex_unlock(&nvmet_rdma_queue_mutex); flush_workqueue(nvmet_wq); } static struct ib_client nvmet_rdma_ib_client = { .name = "nvmet_rdma", .remove = nvmet_rdma_remove_one }; static int __init nvmet_rdma_init(void) { int ret; ret = ib_register_client(&nvmet_rdma_ib_client); if (ret) return ret; ret = nvmet_register_transport(&nvmet_rdma_ops); if (ret) goto err_ib_client; return 0; err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; } static void __exit nvmet_rdma_exit(void) { nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); ida_destroy(&nvmet_rdma_queue_ida); } module_init(nvmet_rdma_init); module_exit(nvmet_rdma_exit); MODULE_DESCRIPTION("NVMe target RDMA transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */
1 1 1 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 // SPDX-License-Identifier: GPL-2.0 /* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces. * * File es58x_devlink.c: report the product information using devlink. * * Copyright (c) 2022 Vincent Mailhol <mailhol.vincent@wanadoo.fr> */ #include <linux/ctype.h> #include <linux/device.h> #include <linux/usb.h> #include <net/devlink.h> #include "es58x_core.h" /* USB descriptor index containing the product information string. */ #define ES58X_PROD_INFO_IDX 6 /** * es58x_parse_sw_version() - Extract boot loader or firmware version. * @es58x_dev: ES58X device. * @prod_info: USB custom string returned by the device. * @prefix: Select which information should be parsed. Set it to "FW" * to parse the firmware version or to "BL" to parse the * bootloader version. * * The @prod_info string contains the firmware and the bootloader * version number all prefixed by a magic string and concatenated with * other numbers. Depending on the device, the firmware (bootloader) * format is either "FW_Vxx.xx.xx" ("BL_Vxx.xx.xx") or "FW:xx.xx.xx" * ("BL:xx.xx.xx") where 'x' represents a digit. @prod_info must * contains the common part of those prefixes: "FW" or "BL". * * Parse @prod_info and store the version number in * &es58x_dev.firmware_version or &es58x_dev.bootloader_version * according to @prefix value. * * Return: zero on success, -EINVAL if @prefix contains an invalid * value and -EBADMSG if @prod_info could not be parsed. */ static int es58x_parse_sw_version(struct es58x_device *es58x_dev, const char *prod_info, const char *prefix) { struct es58x_sw_version *version; int major, minor, revision; if (!strcmp(prefix, "FW")) version = &es58x_dev->firmware_version; else if (!strcmp(prefix, "BL")) version = &es58x_dev->bootloader_version; else return -EINVAL; /* Go to prefix */ prod_info = strstr(prod_info, prefix); if (!prod_info) return -EBADMSG; /* Go to beginning of the version number */ while (!isdigit(*prod_info)) { prod_info++; if (!*prod_info) return -EBADMSG; } if (sscanf(prod_info, "%2u.%2u.%2u", &major, &minor, &revision) != 3) return -EBADMSG; version->major = major; version->minor = minor; version->revision = revision; return 0; } /** * es58x_parse_hw_rev() - Extract hardware revision number. * @es58x_dev: ES58X device. * @prod_info: USB custom string returned by the device. * * @prod_info contains the hardware revision prefixed by a magic * string and conquenated together with other numbers. Depending on * the device, the hardware revision format is either * "HW_VER:axxx/xxx" or "HR:axxx/xxx" where 'a' represents a letter * and 'x' a digit. * * Parse @prod_info and store the hardware revision number in * &es58x_dev.hardware_revision. * * Return: zero on success, -EBADMSG if @prod_info could not be * parsed. */ static int es58x_parse_hw_rev(struct es58x_device *es58x_dev, const char *prod_info) { char letter; int major, minor; /* The only occurrence of 'H' is in the hardware revision prefix. */ prod_info = strchr(prod_info, 'H'); if (!prod_info) return -EBADMSG; /* Go to beginning of the hardware revision */ prod_info = strchr(prod_info, ':'); if (!prod_info) return -EBADMSG; prod_info++; if (sscanf(prod_info, "%c%3u/%3u", &letter, &major, &minor) != 3) return -EBADMSG; es58x_dev->hardware_revision.letter = letter; es58x_dev->hardware_revision.major = major; es58x_dev->hardware_revision.minor = minor; return 0; } /** * es58x_parse_product_info() - Parse the ES58x product information * string. * @es58x_dev: ES58X device. * * Retrieve the product information string and parse it to extract the * firmware version, the bootloader version and the hardware * revision. * * If the function fails, set the version or revision to an invalid * value and emit an informal message. Continue probing because the * product information is not critical for the driver to operate. */ void es58x_parse_product_info(struct es58x_device *es58x_dev) { static const struct es58x_sw_version sw_version_not_set = { .major = -1, .minor = -1, .revision = -1, }; static const struct es58x_hw_revision hw_revision_not_set = { .letter = '\0', .major = -1, .minor = -1, }; char *prod_info; es58x_dev->firmware_version = sw_version_not_set; es58x_dev->bootloader_version = sw_version_not_set; es58x_dev->hardware_revision = hw_revision_not_set; prod_info = usb_cache_string(es58x_dev->udev, ES58X_PROD_INFO_IDX); if (!prod_info) { dev_warn(es58x_dev->dev, "could not retrieve the product info string\n"); return; } if (es58x_parse_sw_version(es58x_dev, prod_info, "FW") || es58x_parse_sw_version(es58x_dev, prod_info, "BL") || es58x_parse_hw_rev(es58x_dev, prod_info)) dev_info(es58x_dev->dev, "could not parse product info: '%s'\n", prod_info); kfree(prod_info); } /** * es58x_sw_version_is_valid() - Check if the version is a valid number. * @sw_ver: Version number of either the firmware or the bootloader. * * If any of the software version sub-numbers do not fit on two * digits, the version is invalid, most probably because the product * string could not be parsed. * * Return: @true if the software version is valid, @false otherwise. */ static inline bool es58x_sw_version_is_valid(struct es58x_sw_version *sw_ver) { return sw_ver->major < 100 && sw_ver->minor < 100 && sw_ver->revision < 100; } /** * es58x_hw_revision_is_valid() - Check if the revision is a valid number. * @hw_rev: Revision number of the hardware. * * If &es58x_hw_revision.letter is not a alphanumeric character or if * any of the hardware revision sub-numbers do not fit on three * digits, the revision is invalid, most probably because the product * string could not be parsed. * * Return: @true if the hardware revision is valid, @false otherwise. */ static inline bool es58x_hw_revision_is_valid(struct es58x_hw_revision *hw_rev) { return isalnum(hw_rev->letter) && hw_rev->major < 1000 && hw_rev->minor < 1000; } /** * es58x_devlink_info_get() - Report the product information. * @devlink: Devlink. * @req: skb wrapper where to put requested information. * @extack: Unused. * * Report the firmware version, the bootloader version, the hardware * revision and the serial number through netlink. * * Return: zero on success, errno when any error occurs. */ static int es58x_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { struct es58x_device *es58x_dev = devlink_priv(devlink); struct es58x_sw_version *fw_ver = &es58x_dev->firmware_version; struct es58x_sw_version *bl_ver = &es58x_dev->bootloader_version; struct es58x_hw_revision *hw_rev = &es58x_dev->hardware_revision; char buf[MAX(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; int ret = 0; if (es58x_sw_version_is_valid(fw_ver)) { snprintf(buf, sizeof(buf), "%02u.%02u.%02u", fw_ver->major, fw_ver->minor, fw_ver->revision); ret = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, buf); if (ret) return ret; } if (es58x_sw_version_is_valid(bl_ver)) { snprintf(buf, sizeof(buf), "%02u.%02u.%02u", bl_ver->major, bl_ver->minor, bl_ver->revision); ret = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER, buf); if (ret) return ret; } if (es58x_hw_revision_is_valid(hw_rev)) { snprintf(buf, sizeof(buf), "%c%03u/%03u", hw_rev->letter, hw_rev->major, hw_rev->minor); ret = devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_BOARD_REV, buf); if (ret) return ret; } return devlink_info_serial_number_put(req, es58x_dev->udev->serial); } const struct devlink_ops es58x_dl_ops = { .info_get = es58x_devlink_info_get, };
343 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SUSPEND_H #define _LINUX_SUSPEND_H #include <linux/swap.h> #include <linux/notifier.h> #include <linux/init.h> #include <linux/pm.h> #include <linux/mm.h> #include <linux/freezer.h> #include <asm/errno.h> #ifdef CONFIG_VT extern void pm_set_vt_switch(int); #else static inline void pm_set_vt_switch(int do_switch) { } #endif #ifdef CONFIG_VT_CONSOLE_SLEEP extern void pm_prepare_console(void); extern void pm_restore_console(void); #else static inline void pm_prepare_console(void) { } static inline void pm_restore_console(void) { } #endif typedef int __bitwise suspend_state_t; #define PM_SUSPEND_ON ((__force suspend_state_t) 0) #define PM_SUSPEND_TO_IDLE ((__force suspend_state_t) 1) #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 2) #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) #define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) /** * struct platform_suspend_ops - Callbacks for managing platform dependent * system sleep states. * * @valid: Callback to determine if given system sleep state is supported by * the platform. * Valid (ie. supported) states are advertised in /sys/power/state. Note * that it still may be impossible to enter given system sleep state if the * conditions aren't right. * There is the %suspend_valid_only_mem function available that can be * assigned to this if the platform only supports mem sleep. * * @begin: Initialise a transition to given system sleep state. * @begin() is executed right prior to suspending devices. The information * conveyed to the platform code by @begin() should be disregarded by it as * soon as @end() is executed. If @begin() fails (ie. returns nonzero), * @prepare(), @enter() and @finish() will not be called by the PM core. * This callback is optional. However, if it is implemented, the argument * passed to @enter() is redundant and should be ignored. * * @prepare: Prepare the platform for entering the system sleep state indicated * by @begin(). * @prepare() is called right after devices have been suspended (ie. the * appropriate .suspend() method has been executed for each device) and * before device drivers' late suspend callbacks are executed. It returns * 0 on success or a negative error code otherwise, in which case the * system cannot enter the desired sleep state (@prepare_late(), @enter(), * and @wake() will not be called in that case). * * @prepare_late: Finish preparing the platform for entering the system sleep * state indicated by @begin(). * @prepare_late is called before disabling nonboot CPUs and after * device drivers' late suspend callbacks have been executed. It returns * 0 on success or a negative error code otherwise, in which case the * system cannot enter the desired sleep state (@enter() will not be * executed). * * @enter: Enter the system sleep state indicated by @begin() or represented by * the argument if @begin() is not implemented. * This callback is mandatory. It returns 0 on success or a negative * error code otherwise, in which case the system cannot enter the desired * sleep state. * * @wake: Called when the system has just left a sleep state, right after * the nonboot CPUs have been enabled and before device drivers' early * resume callbacks are executed. * This callback is optional, but should be implemented by the platforms * that implement @prepare_late(). If implemented, it is always called * after @prepare_late and @enter(), even if one of them fails. * * @finish: Finish wake-up of the platform. * @finish is called right prior to calling device drivers' regular suspend * callbacks. * This callback is optional, but should be implemented by the platforms * that implement @prepare(). If implemented, it is always called after * @enter() and @wake(), even if any of them fails. It is executed after * a failing @prepare. * * @suspend_again: Returns whether the system should suspend again (true) or * not (false). If the platform wants to poll sensors or execute some * code during suspended without invoking userspace and most of devices, * suspend_again callback is the place assuming that periodic-wakeup or * alarm-wakeup is already setup. This allows to execute some codes while * being kept suspended in the view of userland and devices. * * @end: Called by the PM core right after resuming devices, to indicate to * the platform that the system has returned to the working state or * the transition to the sleep state has been aborted. * This callback is optional, but should be implemented by the platforms * that implement @begin(). Accordingly, platforms implementing @begin() * should also provide a @end() which cleans up transitions aborted before * @enter(). * * @recover: Recover the platform from a suspend failure. * Called by the PM core if the suspending of devices fails. * This callback is optional and should only be implemented by platforms * which require special recovery actions in that situation. */ struct platform_suspend_ops { int (*valid)(suspend_state_t state); int (*begin)(suspend_state_t state); int (*prepare)(void); int (*prepare_late)(void); int (*enter)(suspend_state_t state); void (*wake)(void); void (*finish)(void); bool (*suspend_again)(void); void (*end)(void); void (*recover)(void); }; struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); int (*prepare_late)(void); void (*check)(void); bool (*wake)(void); void (*restore_early)(void); void (*restore)(void); void (*end)(void); }; #ifdef CONFIG_SUSPEND extern suspend_state_t pm_suspend_target_state; extern suspend_state_t mem_sleep_current; extern suspend_state_t mem_sleep_default; /** * suspend_set_ops - set platform dependent suspend operations * @ops: The new suspend operations to set. */ extern void suspend_set_ops(const struct platform_suspend_ops *ops); extern int suspend_valid_only_mem(suspend_state_t state); extern unsigned int pm_suspend_global_flags; #define PM_SUSPEND_FLAG_FW_SUSPEND BIT(0) #define PM_SUSPEND_FLAG_FW_RESUME BIT(1) #define PM_SUSPEND_FLAG_NO_PLATFORM BIT(2) static inline void pm_suspend_clear_flags(void) { pm_suspend_global_flags = 0; } static inline void pm_set_suspend_via_firmware(void) { pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_SUSPEND; } static inline void pm_set_resume_via_firmware(void) { pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME; } static inline void pm_set_suspend_no_platform(void) { pm_suspend_global_flags |= PM_SUSPEND_FLAG_NO_PLATFORM; } /** * pm_suspend_via_firmware - Check if platform firmware will suspend the system. * * To be called during system-wide power management transitions to sleep states * or during the subsequent system-wide transitions back to the working state. * * Return 'true' if the platform firmware is going to be invoked at the end of * the system-wide power management transition (to a sleep state) in progress in * order to complete it, or if the platform firmware has been invoked in order * to complete the last (or preceding) transition of the system to a sleep * state. * * This matters if the caller needs or wants to carry out some special actions * depending on whether or not control will be passed to the platform firmware * subsequently (for example, the device may need to be reset before letting the * platform firmware manipulate it, which is not necessary when the platform * firmware is not going to be invoked) or when such special actions may have * been carried out during the preceding transition of the system to a sleep * state (as they may need to be taken into account). */ static inline bool pm_suspend_via_firmware(void) { return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_SUSPEND); } /** * pm_resume_via_firmware - Check if platform firmware has woken up the system. * * To be called during system-wide power management transitions from sleep * states. * * Return 'true' if the platform firmware has passed control to the kernel at * the beginning of the system-wide power management transition in progress, so * the event that woke up the system from sleep has been handled by the platform * firmware. */ static inline bool pm_resume_via_firmware(void) { return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME); } /** * pm_suspend_no_platform - Check if platform may change device power states. * * To be called during system-wide power management transitions to sleep states * or during the subsequent system-wide transitions back to the working state. * * Return 'true' if the power states of devices remain under full control of the * kernel throughout the system-wide suspend and resume cycle in progress (that * is, if a device is put into a certain power state during suspend, it can be * expected to remain in that state during resume). */ static inline bool pm_suspend_no_platform(void) { return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_NO_PLATFORM); } /* Suspend-to-idle state machnine. */ enum s2idle_states { S2IDLE_STATE_NONE, /* Not suspended/suspending. */ S2IDLE_STATE_ENTER, /* Enter suspend-to-idle. */ S2IDLE_STATE_WAKE, /* Wake up from suspend-to-idle. */ }; extern enum s2idle_states __read_mostly s2idle_state; static inline bool idle_should_enter_s2idle(void) { return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } extern bool pm_suspend_default_s2idle(void); extern void __init pm_states_init(void); extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); /** * arch_suspend_disable_irqs - disable IRQs for suspend * * Disables IRQs (in the default case). This is a weak symbol in the common * code and thus allows architectures to override it if more needs to be * done. Not called for suspend to disk. */ extern void arch_suspend_disable_irqs(void); /** * arch_suspend_enable_irqs - enable IRQs after suspend * * Enables IRQs (in the default case). This is a weak symbol in the common * code and thus allows architectures to override it if more needs to be * done. Not called for suspend to disk. */ extern void arch_suspend_enable_irqs(void); extern int pm_suspend(suspend_state_t state); extern bool sync_on_suspend_enabled; #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL #define pm_suspend_target_state (PM_SUSPEND_ON) static inline void pm_suspend_clear_flags(void) {} static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } static inline bool sync_on_suspend_enabled(void) { return true; } static inline bool idle_should_enter_s2idle(void) { return false; } static inline void __init pm_states_init(void) {} static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {} static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ /* struct pbe is used for creating lists of pages that should be restored * atomically during the resume from disk, because the page frames they have * occupied before the suspend are in use. */ struct pbe { void *address; /* address of the copy */ void *orig_address; /* original address of a page */ struct pbe *next; }; /** * struct platform_hibernation_ops - hibernation platform support * * The methods in this structure allow a platform to carry out special * operations required by it during a hibernation transition. * * All the methods below, except for @recover(), must be implemented. * * @begin: Tell the platform driver that we're starting hibernation. * Called right after shrinking memory and before freezing devices. * * @end: Called by the PM core right after resuming devices, to indicate to * the platform that the system has returned to the working state. * * @pre_snapshot: Prepare the platform for creating the hibernation image. * Called right after devices have been frozen and before the nonboot * CPUs are disabled (runs with IRQs on). * * @finish: Restore the previous state of the platform after the hibernation * image has been created *or* put the platform into the normal operation * mode after the hibernation (the same method is executed in both cases). * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). * * @prepare: Prepare the platform for entering the low power state. * Called right after the hibernation image has been saved and before * devices are prepared for entering the low power state. * * @enter: Put the system into the low power state after the hibernation image * has been saved to disk. * Called after the nonboot CPUs have been disabled and all of the low * level devices have been shut down (runs with IRQs off). * * @leave: Perform the first stage of the cleanup after the system sleep state * indicated by @set_target() has been left. * Called right after the control has been passed from the boot kernel to * the image kernel, before the nonboot CPUs are enabled and before devices * are resumed. Executed with interrupts disabled. * * @pre_restore: Prepare system for the restoration from a hibernation image. * Called right after devices have been frozen and before the nonboot * CPUs are disabled (runs with IRQs on). * * @restore_cleanup: Clean up after a failing image restoration. * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). * * @recover: Recover the platform from a failure to suspend devices. * Called by the PM core if the suspending of devices during hibernation * fails. This callback is optional and should only be implemented by * platforms which require special recovery actions in that situation. */ struct platform_hibernation_ops { int (*begin)(pm_message_t stage); void (*end)(void); int (*pre_snapshot)(void); void (*finish)(void); int (*prepare)(void); int (*enter)(void); void (*leave)(void); int (*pre_restore)(void); void (*restore_cleanup)(void); void (*recover)(void); }; #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); extern unsigned long get_safe_page(gfp_t gfp_mask); extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); extern u32 swsusp_hardware_signature; extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); extern bool hibernation_available(void); asmlinkage int swsusp_save(void); extern struct pbe *restore_pblist; int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); int hibernate_resume_nonboot_cpu_disable(void); int arch_hibernation_header_save(void *addr, unsigned int max_size); int arch_hibernation_header_restore(void *addr); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(const struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } static inline bool system_entering_hibernation(void) { return false; } static inline bool hibernation_available(void) { return false; } static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { return -ENOTSUPP; } #endif /* CONFIG_HIBERNATION */ int arch_resume_nosmt(void); #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV int is_hibernate_resume_dev(dev_t dev); #else static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ #define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ #define PM_POST_SUSPEND 0x0004 /* Suspend finished */ #define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ #define PM_POST_RESTORE 0x0006 /* Restore failed */ extern struct mutex system_transition_mutex; #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); /* kernel/power/main.c */ extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ register_pm_notifier(&fn##_nb); \ } /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; static inline bool pm_suspended_storage(void) { return !gfp_has_io_fs(gfp_allowed_mask); } extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) { return 0; } static inline int unregister_pm_notifier(struct notifier_block *nb) { return 0; } static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_suspended_storage(void) { return false; } static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG extern bool pm_print_times_enabled; extern bool pm_debug_messages_on; extern bool pm_debug_messages_should_print(void); static inline int pm_dyn_debug_messages_on(void) { #ifdef CONFIG_DYNAMIC_DEBUG return 1; #else return 0; #endif } #ifndef pr_fmt #define pr_fmt(fmt) "PM: " fmt #endif #define __pm_pr_dbg(fmt, ...) \ do { \ if (pm_debug_messages_should_print()) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ else if (pm_dyn_debug_messages_on()) \ pr_debug(fmt, ##__VA_ARGS__); \ } while (0) #define __pm_deferred_pr_dbg(fmt, ...) \ do { \ if (pm_debug_messages_should_print()) \ printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #else #define pm_print_times_enabled (false) #define pm_debug_messages_on (false) #include <linux/printk.h> #define __pm_pr_dbg(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #define __pm_deferred_pr_dbg(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /** * pm_pr_dbg - print pm sleep debug messages * * If pm_debug_messages_on is enabled and the system is entering/leaving * suspend, print message. * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled, * print message only from instances explicitly enabled on dynamic debug's * control. * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is disabled, * don't print message. */ #define pm_pr_dbg(fmt, ...) \ __pm_pr_dbg(fmt, ##__VA_ARGS__) #define pm_deferred_pr_dbg(fmt, ...) \ __pm_deferred_pr_dbg(fmt, ##__VA_ARGS__) #ifdef CONFIG_PM_AUTOSLEEP /* kernel/power/autosleep.c */ void queue_up_suspend_work(void); #else /* !CONFIG_PM_AUTOSLEEP */ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ enum suspend_stat_step { SUSPEND_WORKING = 0, SUSPEND_FREEZE, SUSPEND_PREPARE, SUSPEND_SUSPEND, SUSPEND_SUSPEND_LATE, SUSPEND_SUSPEND_NOIRQ, SUSPEND_RESUME_NOIRQ, SUSPEND_RESUME_EARLY, SUSPEND_RESUME }; void dpm_save_failed_dev(const char *name); void dpm_save_failed_step(enum suspend_stat_step step); #endif /* _LINUX_SUSPEND_H */
9 9 5 5 10 10 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 // SPDX-License-Identifier: GPL-2.0-only /* * Transparent proxy support for Linux/iptables * * Copyright (c) 2006-2010 BalaBit IT Ltd. * Author: Balazs Scheidler, Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <net/checksum.h> #include <net/udp.h> #include <net/tcp.h> #include <net/inet_sock.h> #include <net/inet_hashtables.h> #include <linux/inetdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_TPROXY_HAVE_IPV6 1 #include <net/if_inet6.h> #include <net/addrconf.h> #include <net/inet6_hashtables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/netfilter/nf_tproxy.h> #include <linux/netfilter/xt_TPROXY.h> static unsigned int tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp; struct sock *sk; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr); if (!lport) lport = hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static unsigned int tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } static unsigned int tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 static unsigned int tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; __be16 lport; int thoff = 0; int tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (!hp) return NF_DROP; /* check if there's an ongoing connection on the packet * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED); laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; /* reopening a TIME_WAIT connection needs special handling */ sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff, xt_net(par), &tgi->laddr.in6, tgi->lport, sk); } else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto, &iph->saddr, laddr, hp->source, lport, xt_in(par), NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } return NF_DROP; } static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; int err; err = nf_defrag_ipv6_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv6_disable(par->net); } #endif static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; int err; err = nf_defrag_ipv4_enable(par->net); if (err) return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) return 0; pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par) { nf_defrag_ipv4_disable(par->net); } static struct xt_target tproxy_tg_reg[] __read_mostly = { { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v0, .revision = 0, .targetsize = sizeof(struct xt_tproxy_target_info), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, { .name = "TPROXY", .family = NFPROTO_IPV4, .table = "mangle", .target = tproxy_tg4_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg4_check, .destroy = tproxy_tg4_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #ifdef XT_TPROXY_HAVE_IPV6 { .name = "TPROXY", .family = NFPROTO_IPV6, .table = "mangle", .target = tproxy_tg6_v1, .revision = 1, .targetsize = sizeof(struct xt_tproxy_target_info_v1), .checkentry = tproxy_tg6_check, .destroy = tproxy_tg6_destroy, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, #endif }; static int __init tproxy_tg_init(void) { return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } static void __exit tproxy_tg_exit(void) { xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } module_init(tproxy_tg_init); module_exit(tproxy_tg_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); MODULE_ALIAS("ipt_TPROXY"); MODULE_ALIAS("ip6t_TPROXY");
47 6 18 98 38 192 192 50 25 199 43 42 133 5 5 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the UDP module. * * Version: @(#)udp.h 1.0.2 05/07/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: * Alan Cox : Turned on udp checksums. I don't want to * chase 'memory corruption' bugs that aren't! */ #ifndef _UDP_H #define _UDP_H #include <linux/list.h> #include <linux/bug.h> #include <net/inet_sock.h> #include <net/gso.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/indirect_call_wrapper.h> /** * struct udp_skb_cb - UDP(-Lite) private variables * * @header: private variables used by IPv4/IPv6 * @cscov: checksum coverage length (UDP-Lite only) * @partial_cov: if set indicates partial csum coverage */ struct udp_skb_cb { union { struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; __u16 cscov; __u8 partial_cov; }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) /** * struct udp_hslot - UDP hash slot used by udp_table.hash/hash4 * * @head: head of list of sockets * @nulls_head: head of list of sockets, only used by hash4 * @count: number of sockets in 'head' list * @lock: spinlock protecting changes to head/count */ struct udp_hslot { union { struct hlist_head head; /* hash4 uses hlist_nulls to avoid moving wrongly onto another * hlist, because rehash() can happen with lookup(). */ struct hlist_nulls_head nulls_head; }; int count; spinlock_t lock; } __aligned(2 * sizeof(long)); /** * struct udp_hslot_main - UDP hash slot used by udp_table.hash2 * * @hslot: basic hash slot * @hash4_cnt: number of sockets in hslot4 of the same * (local port, local address) */ struct udp_hslot_main { struct udp_hslot hslot; /* must be the first member */ #if !IS_ENABLED(CONFIG_BASE_SMALL) u32 hash4_cnt; #endif } __aligned(2 * sizeof(long)); #define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot)) /** * struct udp_table - UDP table * * @hash: hash table, sockets are hashed on (local port) * @hash2: hash table, sockets are hashed on (local port, local address) * @hash4: hash table, connected sockets are hashed on * (local port, local address, remote port, remote address) * @mask: number of slots in hash tables, minus 1 * @log: log2(number of slots in hash table) */ struct udp_table { struct udp_hslot *hash; struct udp_hslot_main *hash2; #if !IS_ENABLED(CONFIG_BASE_SMALL) struct udp_hslot *hash4; #endif unsigned int mask; unsigned int log; }; extern struct udp_table udp_table; void udp_table_init(struct udp_table *, const char *); static inline struct udp_hslot *udp_hashslot(struct udp_table *table, const struct net *net, unsigned int num) { return &table->hash[udp_hashfn(net, num, table->mask)]; } /* * For secondary hash, net_hash_mix() is performed before calling * udp_hashslot2(), this explains difference with udp_hashslot() */ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, unsigned int hash) { return &table->hash2[hash & table->mask].hslot; } #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_table_hash4_init(struct udp_table *table) { } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { BUILD_BUG(); return NULL; } static inline bool udp_hashed4(const struct sock *sk) { return false; } static inline unsigned int udp_hash4_slot_size(void) { return 0; } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return false; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { } #else /* !CONFIG_BASE_SMALL */ /* Must be called with table->hash2 initialized */ static inline void udp_table_hash4_init(struct udp_table *table) { table->hash4 = (void *)(table->hash2 + (table->mask + 1)); for (int i = 0; i <= table->mask; i++) { table->hash2[i].hash4_cnt = 0; INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i); table->hash4[i].count = 0; spin_lock_init(&table->hash4[i].lock); } } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { return &table->hash4[hash & table->mask]; } static inline bool udp_hashed4(const struct sock *sk) { return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node); } static inline unsigned int udp_hash4_slot_size(void) { return sizeof(struct udp_hslot); } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return UDP_HSLOT_MAIN(hslot2)->hash4_cnt; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt++; } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt--; } #endif /* CONFIG_BASE_SMALL */ extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; extern int sysctl_udp_rmem_min; extern int sysctl_udp_wmem_min; struct sk_buff; /* * Generic checksumming routines for UDP(-Lite) v4 and v6 */ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) { return (UDP_SKB_CB(skb)->cscov == skb->len ? __skb_checksum_complete(skb) : __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov)); } static inline int udp_lib_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && __udp_lib_checksum_complete(skb); } /** * udp_csum_outgoing - compute UDPv4/v6 checksum over fragments * @sk: socket we are writing to * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) */ static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), 0); skb_queue_walk(&sk->sk_write_queue, skb) { csum = csum_add(csum, skb->csum); } return csum; } static inline __wsum udp_csum(struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), skb->csum); for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) { csum = csum_add(csum, skb->csum); } return csum; } static inline __sum16 udp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); static inline void udp_csum_pull_header(struct sk_buff *skb) { if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE) skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); static inline void udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { BUG(); return 0; } void udp_lib_unhash(struct sock *sk); void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport); static inline void udp_lib_close(struct sock *sk, long timeout) { sk_common_release(sk); } /* hash4 routines shared between UDPv4/6 */ #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_lib_hash4(struct sock *sk, u16 hash) { } static inline void udp4_hash4(struct sock *sk) { } #else /* !CONFIG_BASE_SMALL */ void udp_lib_hash4(struct sock *sk, u16 hash); void udp4_hash4(struct sock *sk); #endif /* CONFIG_BASE_SMALL */ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, int min, int max, bool use_eth) { u32 hash; if (min >= max) { /* Use default range */ inet_get_local_port_range(net, &min, &max); } hash = skb_get_hash(skb); if (unlikely(!hash)) { if (use_eth) { /* Can't find a normal hash, caller has indicated an * Ethernet packet so use that to compute a hash. */ hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); } else { /* Can't derive any sort of hash for the packet, set * to some consistent random value. */ hash = udp_flow_hashrnd(); } } /* Since this is being sent on the wire obfuscate hash a bit * to minimize possibility that any useful information to an * attacker is leaked. Only upper 16 bits are relevant in the * computation for 16 bit port value. */ hash ^= hash << 16; return htons((((u64) hash * (max - min)) >> 32) + min); } static inline int udp_rqueue_get(struct sock *sk) { return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); #endif } /* net/ipv4/udp.c */ void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int *err) { int off = 0; return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)); struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif); struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ struct udp_dev_scratch { /* skb->truesize and the stateless bit are embedded in a single field; * do not use a bitfield since the compiler emits better/smaller code * this way */ u32 _tsize_state; #if BITS_PER_LONG == 64 /* len and the bit needed to compute skb_csum_unnecessary * will be on cold cache lines at recvmsg time. * skb->len can be stored on 16 bits since the udp header has been * already validated and pulled. */ u16 len; bool is_linear; bool csum_unnecessary; #endif }; static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) { return (struct udp_dev_scratch *)&skb->dev_scratch; } #if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return udp_skb_scratch(skb)->is_linear; } #else static inline unsigned int udp_skb_len(struct sk_buff *skb) { return skb->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return skb_csum_unnecessary(skb); } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return !skb_is_nonlinear(skb); } #endif static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* * SNMP statistics for UDP and UDP-Lite */ #define UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP6_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #define UDP6_INC_STATS(net, field, __lite) do { \ if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #if IS_ENABLED(CONFIG_IPV6) #define __UDPX_MIB(sk, ipv4) \ ({ \ ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics) : \ (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ sock_net(sk)->mib.udp_stats_in6); \ }) #else #define __UDPX_MIB(sk, ipv4) \ ({ \ IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics; \ }) #endif #define __UDPX_INC_STATS(sk, field) \ __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; struct udp_table *udp_table; }; struct udp_iter_state { struct seq_net_private p; int bucket; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void udp_seq_stop(struct seq_file *seq, void *v); extern const struct seq_operations udp_seq_ops; extern const struct seq_operations udp6_seq_ops; int udp4_proc_init(void); void udp4_proc_exit(void); #endif /* CONFIG_PROC_FS */ int udpv4_offload_init(void); void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif static inline struct sk_buff *udp_rcv_segment(struct sock *sk, struct sk_buff *skb, bool ipv4) { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values */ if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. * Reset in this specific case, where PARTIAL is both correct and * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; /* the GSO CB lays after the UDP one, no need to save and restore any * CB fragment */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { int segs_nr = skb_shinfo(skb)->gso_segs; atomic_add(segs_nr, &sk->sk_drops); SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); kfree_skb(skb); return NULL; } consume_skb(skb); return segs; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) { /* UDP-lite can't land here - no GRO */ WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); /* UDP packets generated with UDP_SEGMENT and traversing: * * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) * * can reach an UDP socket with CHECKSUM_NONE, because * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE. * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will * have a valid checksum, as the GRO engine validates the UDP csum * before the aggregation and nobody strips such info in between. * Instead of adding another check in the tunnel fastpath, we can force * a valid csum after the segmentation. * Additionally fixup the UDP CB. */ UDP_SKB_CB(skb)->cscov = skb->len; if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) skb->csum_valid = 1; } #ifdef CONFIG_BPF_SYSCALL struct sk_psock; int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); #endif #endif /* _UDP_H */
245 238 7 77 61 945 13 591 1 174 19 2 766 1018 1024 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 /* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVMMMU_H #include <linux/tracepoint.h> #include <linux/trace_events.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu #define KVM_MMU_PAGE_FIELDS \ __field(__u8, mmu_valid_gen) \ __field(__u64, gfn) \ __field(__u32, role) \ __field(__u32, root_count) \ __field(bool, unsync) #define KVM_MMU_PAGE_ASSIGN(sp) \ __entry->mmu_valid_gen = sp->mmu_valid_gen; \ __entry->gfn = sp->gfn; \ __entry->role = sp->role.word; \ __entry->root_count = sp->root_count; \ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ const char *saved_ptr = trace_seq_buffer_ptr(p); \ static const char *access_str[] = { \ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ }; \ union kvm_mmu_page_role role; \ \ role.word = __entry->role; \ \ trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.has_4_byte_gpte ? 4 : 8, \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ role.invalid ? " invalid" : "", \ role.efer_nx ? "" : "!", \ role.ad_disabled ? "!" : "", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ saved_ptr; \ }) #define kvm_mmu_trace_pferr_flags \ { PFERR_PRESENT_MASK, "P" }, \ { PFERR_WRITE_MASK, "W" }, \ { PFERR_USER_MASK, "U" }, \ { PFERR_RSVD_MASK, "RSVD" }, \ { PFERR_FETCH_MASK, "F" } TRACE_DEFINE_ENUM(RET_PF_CONTINUE); TRACE_DEFINE_ENUM(RET_PF_RETRY); TRACE_DEFINE_ENUM(RET_PF_EMULATE); TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED); TRACE_DEFINE_ENUM(RET_PF_INVALID); TRACE_DEFINE_ENUM(RET_PF_FIXED); TRACE_DEFINE_ENUM(RET_PF_SPURIOUS); /* * A pagetable walk has started */ TRACE_EVENT( kvm_mmu_pagetable_walk, TP_PROTO(u64 addr, u32 pferr), TP_ARGS(addr, pferr), TP_STRUCT__entry( __field(__u64, addr) __field(__u32, pferr) ), TP_fast_assign( __entry->addr = addr; __entry->pferr = pferr; ), TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) ); /* We just walked a paging element */ TRACE_EVENT( kvm_mmu_paging_element, TP_PROTO(u64 pte, int level), TP_ARGS(pte, level), TP_STRUCT__entry( __field(__u64, pte) __field(__u32, level) ), TP_fast_assign( __entry->pte = pte; __entry->level = level; ), TP_printk("pte %llx level %u", __entry->pte, __entry->level) ); DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), TP_ARGS(table_gfn, index, size), TP_STRUCT__entry( __field(__u64, gpa) ), TP_fast_assign( __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) + index * size; ), TP_printk("gpa %llx", __entry->gpa) ); /* We set a pte accessed bit */ DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), TP_ARGS(table_gfn, index, size) ); /* We set a pte dirty bit */ DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), TP_ARGS(table_gfn, index, size) ); TRACE_EVENT( kvm_mmu_walker_error, TP_PROTO(u32 pferr), TP_ARGS(pferr), TP_STRUCT__entry( __field(__u32, pferr) ), TP_fast_assign( __entry->pferr = pferr; ), TP_printk("pferr %x %s", __entry->pferr, __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) ); TRACE_EVENT( kvm_mmu_get_page, TP_PROTO(struct kvm_mmu_page *sp, bool created), TP_ARGS(sp, created), TP_STRUCT__entry( KVM_MMU_PAGE_FIELDS __field(bool, created) ), TP_fast_assign( KVM_MMU_PAGE_ASSIGN(sp) __entry->created = created; ), TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(), __entry->created ? "new" : "existing") ); DECLARE_EVENT_CLASS(kvm_mmu_page_class, TP_PROTO(struct kvm_mmu_page *sp), TP_ARGS(sp), TP_STRUCT__entry( KVM_MMU_PAGE_FIELDS ), TP_fast_assign( KVM_MMU_PAGE_ASSIGN(sp) ), TP_printk("%s", KVM_MMU_PAGE_PRINTK()) ); DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, TP_PROTO(struct kvm_mmu_page *sp), TP_ARGS(sp) ); DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, TP_PROTO(struct kvm_mmu_page *sp), TP_ARGS(sp) ); DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, TP_PROTO(struct kvm_mmu_page *sp), TP_ARGS(sp) ); TRACE_EVENT( mark_mmio_spte, TP_PROTO(u64 *sptep, gfn_t gfn, u64 spte), TP_ARGS(sptep, gfn, spte), TP_STRUCT__entry( __field(void *, sptep) __field(gfn_t, gfn) __field(unsigned, access) __field(unsigned int, gen) ), TP_fast_assign( __entry->sptep = sptep; __entry->gfn = gfn; __entry->access = spte & ACC_ALL; __entry->gen = get_mmio_spte_generation(spte); ), TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep, __entry->gfn, __entry->access, __entry->gen) ); TRACE_EVENT( handle_mmio_page_fault, TP_PROTO(u64 addr, gfn_t gfn, unsigned access), TP_ARGS(addr, gfn, access), TP_STRUCT__entry( __field(u64, addr) __field(gfn_t, gfn) __field(unsigned, access) ), TP_fast_assign( __entry->addr = addr; __entry->gfn = gfn; __entry->access = access; ), TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, __entry->access) ); TRACE_EVENT( fast_page_fault, TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, int ret), TP_ARGS(vcpu, fault, sptep, old_spte, ret), TP_STRUCT__entry( __field(int, vcpu_id) __field(gpa_t, cr2_or_gpa) __field(u64, error_code) __field(u64 *, sptep) __field(u64, old_spte) __field(u64, new_spte) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->cr2_or_gpa = fault->addr; __entry->error_code = fault->error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; __entry->new_spte = *sptep; __entry->ret = ret; ), TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __entry->ret == RET_PF_SPURIOUS, __entry->ret == RET_PF_FIXED ) ); TRACE_EVENT( kvm_mmu_zap_all_fast, TP_PROTO(struct kvm *kvm), TP_ARGS(kvm), TP_STRUCT__entry( __field(__u8, mmu_valid_gen) __field(unsigned int, mmu_used_pages) ), TP_fast_assign( __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; ), TP_printk("kvm-mmu-valid-gen %u used_pages %x", __entry->mmu_valid_gen, __entry->mmu_used_pages ) ); TRACE_EVENT( check_mmio_spte, TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), TP_ARGS(spte, kvm_gen, spte_gen), TP_STRUCT__entry( __field(unsigned int, kvm_gen) __field(unsigned int, spte_gen) __field(u64, spte) ), TP_fast_assign( __entry->kvm_gen = kvm_gen; __entry->spte_gen = spte_gen; __entry->spte = spte; ), TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte, __entry->kvm_gen, __entry->spte_gen, __entry->kvm_gen == __entry->spte_gen ) ); TRACE_EVENT( kvm_mmu_set_spte, TP_PROTO(int level, gfn_t gfn, u64 *sptep), TP_ARGS(level, gfn, sptep), TP_STRUCT__entry( __field(u64, gfn) __field(u64, spte) __field(u64, sptep) __field(u8, level) /* These depend on page entry type, so compute them now. */ __field(bool, r) __field(bool, x) __field(signed char, u) ), TP_fast_assign( __entry->gfn = gfn; __entry->spte = *sptep; __entry->sptep = virt_to_phys(sptep); __entry->level = level; __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); __entry->x = is_executable_pte(__entry->spte); __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; ), TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", __entry->gfn, __entry->spte, __entry->r ? "r" : "-", __entry->spte & PT_WRITABLE_MASK ? "w" : "-", __entry->x ? "x" : "-", __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), __entry->level, __entry->sptep ) ); TRACE_EVENT( kvm_mmu_spte_requested, TP_PROTO(struct kvm_page_fault *fault), TP_ARGS(fault), TP_STRUCT__entry( __field(u64, gfn) __field(u64, pfn) __field(u8, level) ), TP_fast_assign( __entry->gfn = fault->gfn; __entry->pfn = fault->pfn | (fault->gfn & (KVM_PAGES_PER_HPAGE(fault->goal_level) - 1)); __entry->level = fault->goal_level; ), TP_printk("gfn %llx pfn %llx level %d", __entry->gfn, __entry->pfn, __entry->level ) ); TRACE_EVENT( kvm_tdp_mmu_spte_changed, TP_PROTO(int as_id, gfn_t gfn, int level, u64 old_spte, u64 new_spte), TP_ARGS(as_id, gfn, level, old_spte, new_spte), TP_STRUCT__entry( __field(u64, gfn) __field(u64, old_spte) __field(u64, new_spte) /* Level cannot be larger than 5 on x86, so it fits in a u8. */ __field(u8, level) /* as_id can only be 0 or 1 x86, so it fits in a u8. */ __field(u8, as_id) ), TP_fast_assign( __entry->gfn = gfn; __entry->old_spte = old_spte; __entry->new_spte = new_spte; __entry->level = level; __entry->as_id = as_id; ), TP_printk("as id %d gfn %llx level %d old_spte %llx new_spte %llx", __entry->as_id, __entry->gfn, __entry->level, __entry->old_spte, __entry->new_spte ) ); TRACE_EVENT( kvm_mmu_split_huge_page, TP_PROTO(u64 gfn, u64 spte, int level, int errno), TP_ARGS(gfn, spte, level, errno), TP_STRUCT__entry( __field(u64, gfn) __field(u64, spte) __field(int, level) __field(int, errno) ), TP_fast_assign( __entry->gfn = gfn; __entry->spte = spte; __entry->level = level; __entry->errno = errno; ), TP_printk("gfn %llx spte %llx level %d errno %d", __entry->gfn, __entry->spte, __entry->level, __entry->errno) ); #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH mmu #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE mmutrace /* This part must be outside protection */ #include <trace/define_trace.h>
98 98 99 98 12 87 81 107 107 36 22 7 80 101 101 308 310 15 4 50 252 33 33 33 4 4 2 2 2 2 2 2 1 52 23 3 4 7 4 3 2 4 7 4 30 2 14 13 1 13 1 10 4 10 4 11 3 10 4 1 1 24 23 1 13 13 21 19 1 11 2 1 1 27 3 11 12 1 12 1 11 2 11 2 8 5 9 4 11 11 10 4 1 91 77 13 32 14 116 3 101 109 110 107 61 53 9 45 16 36 25 56 2 3 57 4 61 47 14 51 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Nicira, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/netfilter_ipv4.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/static_key.h> #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/dst_metadata.h> #include <net/geneve.h> #include <net/vxlan.h> #include <net/erspan.h> const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; EXPORT_SYMBOL(iptun_encaps); const struct ip6_tnl_encap_ops __rcu * ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; EXPORT_SYMBOL(ip6tun_encaps); void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; struct iphdr *iph; int err; skb_scrub_packet(skb, xnet); skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(net, sk, skb); if (dev) { if (unlikely(net_xmit_eval(err))) pkt_len = 0; iptunnel_xmit_stats(dev, pkt_len); } } EXPORT_SYMBOL_GPL(iptunnel_xmit); int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool raw_proto, bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); if (!raw_proto && inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; eh = (struct ethhdr *)skb->data; if (likely(eth_proto_is_802_3(eh->h_proto))) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); } else { skb->protocol = inner_proto; } skb_clear_hash_if_not_l4(skb); __vlan_hwaccel_clear_tag(skb); skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, xnet); return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) { IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; struct metadata_dst *res; struct ip_tunnel_info *dst, *src; if (!md || md->type != METADATA_IP_TUNNEL || md->u.tun_info.mode & IP_TUNNEL_INFO_TX) return NULL; src = &md->u.tun_info; res = metadata_dst_alloc(src->options_len, METADATA_IP_TUNNEL, flags); if (!res) return NULL; dst = &res->u.tun_info; dst->key.tun_id = src->key.tun_id; if (src->mode & IP_TUNNEL_INFO_IPV6) memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src, sizeof(struct in6_addr)); else dst->key.u.ipv4.dst = src->key.u.ipv4.src; ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags); dst->mode = src->mode | IP_TUNNEL_INFO_TX; ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src), src->options_len, tun_flags); return res; } EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask) { int err; if (likely(!skb->encapsulation)) { skb_reset_inner_headers(skb); skb->encapsulation = 1; } if (skb_is_gso(skb)) { err = skb_header_unclone(skb, GFP_ATOMIC); if (unlikely(err)) return err; skb_shinfo(skb)->gso_type |= gso_type_mask; return 0; } if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_NONE; /* We clear encapsulation here to prevent badly-written * drivers potentially deciding to offload an inner checksum * if we set CHECKSUM_PARTIAL on the outer header. * This should go away when the drivers are all fixed. */ skb->encapsulation = 0; } return 0; } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); /** * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD * @skb: Original packet with L2 header * @mtu: MTU value for ICMP error * * Return: length on success, negative error code if message couldn't be built. */ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) { const struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph; struct iphdr *niph; struct ethhdr eh; int len, err; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) return -EINVAL; skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); if (err) return err; len = skb->len + sizeof(*icmph); err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); if (err) return err; icmph = skb_push(skb, sizeof(*icmph)); *icmph = (struct icmphdr) { .type = ICMP_DEST_UNREACH, .code = ICMP_FRAG_NEEDED, .checksum = 0, .un.frag.__unused = 0, .un.frag.mtu = htons(mtu), }; icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0)); skb_reset_transport_header(skb); niph = skb_push(skb, sizeof(*niph)); *niph = (struct iphdr) { .ihl = sizeof(*niph) / 4u, .version = 4, .tos = 0, .tot_len = htons(len + sizeof(*niph)), .id = 0, .frag_off = htons(IP_DF), .ttl = iph->ttl, .protocol = IPPROTO_ICMP, .saddr = iph->daddr, .daddr = iph->saddr, }; ip_send_check(niph); skb_reset_network_header(skb); skb->ip_summed = CHECKSUM_NONE; eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; } /** * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed * @skb: Buffer being sent by encapsulation, L2 headers expected * @mtu: Network MTU for path * * Return: 0 for no ICMP reply, length if built, negative value on error. */ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) { const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); if (mtu < 576 || iph->frag_off != htons(IP_DF)) return 0; if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) || ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) return 0; if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) return 0; return iptunnel_pmtud_build_icmp(skb, mtu); } #if IS_ENABLED(CONFIG_IPV6) /** * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD * @skb: Original packet with L2 header * @mtu: MTU value for ICMPv6 error * * Return: length on success, negative error code if message couldn't be built. */ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct icmp6hdr *icmp6h; struct ipv6hdr *nip6h; struct ethhdr eh; int len, err; __wsum csum; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) return -EINVAL; skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); if (err) return err; len = skb->len + sizeof(*icmp6h); err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN); if (err) return err; icmp6h = skb_push(skb, sizeof(*icmp6h)); *icmp6h = (struct icmp6hdr) { .icmp6_type = ICMPV6_PKT_TOOBIG, .icmp6_code = 0, .icmp6_cksum = 0, .icmp6_mtu = htonl(mtu), }; skb_reset_transport_header(skb); nip6h = skb_push(skb, sizeof(*nip6h)); *nip6h = (struct ipv6hdr) { .priority = 0, .version = 6, .flow_lbl = { 0 }, .payload_len = htons(len), .nexthdr = IPPROTO_ICMPV6, .hop_limit = ip6h->hop_limit, .saddr = ip6h->daddr, .daddr = ip6h->saddr, }; skb_reset_network_header(skb); csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); skb->ip_summed = CHECKSUM_NONE; eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0); skb_reset_mac_header(skb); return skb->len; } /** * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed * @skb: Buffer being sent by encapsulation, L2 headers expected * @mtu: Network MTU for path * * Return: 0 for no ICMPv6 reply, length if built, negative value on error. */ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); int stype = ipv6_addr_type(&ip6h->saddr); u8 proto = ip6h->nexthdr; __be16 frag_off; int offset; if (mtu < IPV6_MIN_MTU) return 0; if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || stype == IPV6_ADDR_LOOPBACK) return 0; offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, &frag_off); if (offset < 0 || (frag_off & htons(~0x7))) return 0; if (proto == IPPROTO_ICMPV6) { struct icmp6hdr *icmp6h; if (!pskb_may_pull(skb, skb_network_header(skb) + offset + 1 - skb->data)) return 0; icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset); if (icmpv6_is_err(icmp6h->icmp6_type) || icmp6h->icmp6_type == NDISC_REDIRECT) return 0; } return iptunnel_pmtud_build_icmpv6(skb, mtu); } #endif /* IS_ENABLED(CONFIG_IPV6) */ /** * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed * @skb: Buffer being sent by encapsulation, L2 headers expected * @encap_dst: Destination for tunnel encapsulation (outer IP) * @headroom: Encapsulation header size, bytes * @reply: Build matching ICMP or ICMPv6 message as a result * * L2 tunnel implementations that can carry IP and can be directly bridged * (currently UDP tunnels) can't always rely on IP forwarding paths to handle * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built * based on payload and sent back by the encapsulation itself. * * For routable interfaces, we just need to update the PMTU for the destination. * * Return: 0 if ICMP error not needed, length if built, negative value on error */ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply) { u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); if (!reply || skb->pkt_type == PACKET_HOST) return 0; if (skb->protocol == htons(ETH_P_IP)) return iptunnel_pmtud_check_icmp(skb, mtu); #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) return iptunnel_pmtud_check_icmpv6(skb, mtu); #endif return 0; } EXPORT_SYMBOL(skb_tunnel_check_pmtu); static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_UNSPEC] = { .strict_start_type = LWTUNNEL_IP_OPTS }, [LWTUNNEL_IP_ID] = { .type = NLA_U64 }, [LWTUNNEL_IP_DST] = { .type = NLA_U32 }, [LWTUNNEL_IP_SRC] = { .type = NLA_U32 }, [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPTS] = { .type = NLA_NESTED }, }; static const struct nla_policy ip_opts_policy[LWTUNNEL_IP_OPTS_MAX + 1] = { [LWTUNNEL_IP_OPTS_GENEVE] = { .type = NLA_NESTED }, [LWTUNNEL_IP_OPTS_VXLAN] = { .type = NLA_NESTED }, [LWTUNNEL_IP_OPTS_ERSPAN] = { .type = NLA_NESTED }, }; static const struct nla_policy geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = { [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, }; static const struct nla_policy vxlan_opt_policy[LWTUNNEL_IP_OPT_VXLAN_MAX + 1] = { [LWTUNNEL_IP_OPT_VXLAN_GBP] = { .type = NLA_U32 }, }; static const struct nla_policy erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = { [LWTUNNEL_IP_OPT_ERSPAN_VER] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, [LWTUNNEL_IP_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, [LWTUNNEL_IP_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, }; static int ip_tun_parse_opts_geneve(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1]; int data_len, err; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_GENEVE_MAX, attr, geneve_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_GENEVE_CLASS] || !tb[LWTUNNEL_IP_OPT_GENEVE_TYPE] || !tb[LWTUNNEL_IP_OPT_GENEVE_DATA]) return -EINVAL; attr = tb[LWTUNNEL_IP_OPT_GENEVE_DATA]; data_len = nla_len(attr); if (data_len % 4) return -EINVAL; if (info) { struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len; memcpy(opt->opt_data, nla_data(attr), data_len); opt->length = data_len / 4; attr = tb[LWTUNNEL_IP_OPT_GENEVE_CLASS]; opt->opt_class = nla_get_be16(attr); attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE]; opt->type = nla_get_u8(attr); __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); } return sizeof(struct geneve_opt) + data_len; } static int ip_tun_parse_opts_vxlan(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1]; int err; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_VXLAN_MAX, attr, vxlan_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_VXLAN_GBP]) return -EINVAL; if (info) { struct vxlan_metadata *md = ip_tunnel_info_opts(info) + opts_len; attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); md->gbp &= VXLAN_GBP_MASK; __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct vxlan_metadata); } static int ip_tun_parse_opts_erspan(struct nlattr *attr, struct ip_tunnel_info *info, int opts_len, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1]; int err; u8 ver; err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX, attr, erspan_opt_policy, extack); if (err) return err; if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER]) return -EINVAL; ver = nla_get_u8(tb[LWTUNNEL_IP_OPT_ERSPAN_VER]); if (ver == 1) { if (!tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) return -EINVAL; } else if (ver == 2) { if (!tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] || !tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) return -EINVAL; } else { return -EINVAL; } if (info) { struct erspan_metadata *md = ip_tunnel_info_opts(info) + opts_len; md->version = ver; if (ver == 1) { attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]; md->u.index = nla_get_be32(attr); } else { attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR]; md->u.md2.dir = nla_get_u8(attr); attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]; set_hwid(&md->u.md2, nla_get_u8(attr)); } __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct erspan_metadata); } static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, struct netlink_ext_ack *extack) { int err, rem, opt_len, opts_len = 0; struct nlattr *nla; u32 type = 0; if (!attr) return 0; err = nla_validate(nla_data(attr), nla_len(attr), LWTUNNEL_IP_OPTS_MAX, ip_opts_policy, extack); if (err) return err; nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { switch (nla_type(nla)) { case LWTUNNEL_IP_OPTS_GENEVE: if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) return -EINVAL; opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; if (opts_len > IP_TUNNEL_OPTS_MAX) return -EINVAL; type = IP_TUNNEL_GENEVE_OPT_BIT; break; case LWTUNNEL_IP_OPTS_VXLAN: if (type) return -EINVAL; opt_len = ip_tun_parse_opts_vxlan(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = IP_TUNNEL_VXLAN_OPT_BIT; break; case LWTUNNEL_IP_OPTS_ERSPAN: if (type) return -EINVAL; opt_len = ip_tun_parse_opts_erspan(nla, info, opts_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; type = IP_TUNNEL_ERSPAN_OPT_BIT; break; default: return -EINVAL; } } return opts_len; } static int ip_tun_get_optlen(struct nlattr *attr, struct netlink_ext_ack *extack) { return ip_tun_parse_opts(attr, NULL, extack); } static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, struct netlink_ext_ack *extack) { return ip_tun_parse_opts(attr, info, extack); } static int ip_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; struct lwtunnel_state *new_state; struct ip_tunnel_info *tun_info; int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, extack); if (err < 0) return err; opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP_OPTS], extack); if (opt_len < 0) return opt_len; new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; new_state->type = LWTUNNEL_ENCAP_IP; tun_info = lwt_tun_info(new_state); err = ip_tun_set_opts(tb[LWTUNNEL_IP_OPTS], tun_info, extack); if (err < 0) { lwtstate_free(new_state); return err; } #ifdef CONFIG_DST_CACHE err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL); if (err) { lwtstate_free(new_state); return err; } #endif if (tb[LWTUNNEL_IP_ID]) tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); if (tb[LWTUNNEL_IP_TOS]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_FLAGS]) { IP_TUNNEL_DECLARE_FLAGS(flags); ip_tunnel_flags_from_be16(flags, nla_get_be16(tb[LWTUNNEL_IP_FLAGS])); ip_tunnel_clear_options_present(flags); ip_tunnel_flags_or(tun_info->key.tun_flags, tun_info->key.tun_flags, flags); } tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = opt_len; *ts = new_state; return 0; } static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate) { #ifdef CONFIG_DST_CACHE struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); dst_cache_destroy(&tun_info->dst_cache); #endif } static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct geneve_opt *opt; struct nlattr *nest; int offset = 0; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE); if (!nest) return -ENOMEM; while (tun_info->options_len > offset) { opt = ip_tunnel_info_opts(tun_info) + offset; if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) || nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) || nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4, opt->opt_data)) { nla_nest_cancel(skb, nest); return -ENOMEM; } offset += sizeof(*opt) + opt->length * 4; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct vxlan_metadata *md; struct nlattr *nest; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_VXLAN); if (!nest) return -ENOMEM; md = ip_tunnel_info_opts(tun_info); if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) { nla_nest_cancel(skb, nest); return -ENOMEM; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { struct erspan_metadata *md; struct nlattr *nest; nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_ERSPAN); if (!nest) return -ENOMEM; md = ip_tunnel_info_opts(tun_info); if (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version)) goto err; if (md->version == 1 && nla_put_be32(skb, LWTUNNEL_IP_OPT_ERSPAN_INDEX, md->u.index)) goto err; if (md->version == 2 && (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_DIR, md->u.md2.dir) || nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_HWID, get_hwid(&md->u.md2)))) goto err; nla_nest_end(skb, nest); return 0; err: nla_nest_cancel(skb, nest); return -ENOMEM; } static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type, struct ip_tunnel_info *tun_info) { struct nlattr *nest; int err = 0; if (!ip_tunnel_is_options_present(tun_info->key.tun_flags)) return 0; nest = nla_nest_start_noflag(skb, type); if (!nest) return -ENOMEM; if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_geneve(skb, tun_info); else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_vxlan(skb, tun_info); else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_erspan(skb, tun_info); if (err) { nla_nest_cancel(skb, nest); return err; } nla_nest_end(skb, nest); return 0; } static int ip_tun_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id, LWTUNNEL_IP_PAD) || nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP_FLAGS, ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info)) return -ENOMEM; return 0; } static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) { int opt_len; if (!ip_tunnel_is_options_present(info->key.tun_flags)) return 0; opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { struct geneve_opt *opt; int offset = 0; opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */ while (info->options_len > offset) { opt = ip_tunnel_info_opts(info) + offset; opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */ + nla_total_size(1) /* OPT_GENEVE_TYPE */ + nla_total_size(opt->length * 4); /* OPT_GENEVE_DATA */ offset += sizeof(*opt) + opt->length * 4; } } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ + nla_total_size(4); /* OPT_VXLAN_GBP */ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { struct erspan_metadata *md = ip_tunnel_info_opts(info); opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */ + nla_total_size(1) /* OPT_ERSPAN_VER */ + (md->version == 1 ? nla_total_size(4) /* OPT_ERSPAN_INDEX (v1) */ : nla_total_size(1) + nla_total_size(1)); /* OPT_ERSPAN_DIR + HWID (v2) */ } return opt_len; } static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */ + nla_total_size(4) /* LWTUNNEL_IP_DST */ + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + nla_total_size(1) /* LWTUNNEL_IP_TOS */ + nla_total_size(1) /* LWTUNNEL_IP_TTL */ + nla_total_size(2) /* LWTUNNEL_IP_FLAGS */ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); /* LWTUNNEL_IP_OPTS */ } static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct ip_tunnel_info *info_a = lwt_tun_info(a); struct ip_tunnel_info *info_b = lwt_tun_info(b); return memcmp(info_a, info_b, sizeof(info_a->key)) || info_a->mode != info_b->mode || info_a->options_len != info_b->options_len || memcmp(ip_tunnel_info_opts(info_a), ip_tunnel_info_opts(info_b), info_a->options_len); } static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .build_state = ip_tun_build_state, .destroy_state = ip_tun_destroy_state, .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_UNSPEC] = { .strict_start_type = LWTUNNEL_IP6_OPTS }, [LWTUNNEL_IP6_ID] = { .type = NLA_U64 }, [LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) }, [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) }, [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 }, [LWTUNNEL_IP6_TC] = { .type = NLA_U8 }, [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED }, }; static int ip6_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; struct lwtunnel_state *new_state; struct ip_tunnel_info *tun_info; int err, opt_len; err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, extack); if (err < 0) return err; opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP6_OPTS], extack); if (opt_len < 0) return opt_len; new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len); if (!new_state) return -ENOMEM; new_state->type = LWTUNNEL_ENCAP_IP6; tun_info = lwt_tun_info(new_state); err = ip_tun_set_opts(tb[LWTUNNEL_IP6_OPTS], tun_info, extack); if (err < 0) { lwtstate_free(new_state); return err; } if (tb[LWTUNNEL_IP6_ID]) tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]); if (tb[LWTUNNEL_IP6_DST]) tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]); if (tb[LWTUNNEL_IP6_SRC]) tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]); if (tb[LWTUNNEL_IP6_HOPLIMIT]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]); if (tb[LWTUNNEL_IP6_TC]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); if (tb[LWTUNNEL_IP6_FLAGS]) { IP_TUNNEL_DECLARE_FLAGS(flags); __be16 data; data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); ip_tunnel_flags_from_be16(flags, data); ip_tunnel_clear_options_present(flags); ip_tunnel_flags_or(tun_info->key.tun_flags, tun_info->key.tun_flags, flags); } tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = opt_len; *ts = new_state; return 0; } static int ip6_tun_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id, LWTUNNEL_IP6_PAD) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info)) return -ENOMEM; return 0; } static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(8) /* LWTUNNEL_IP6_ID */ + nla_total_size(16) /* LWTUNNEL_IP6_DST */ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ + nla_total_size(1) /* LWTUNNEL_IP6_TC */ + nla_total_size(2) /* LWTUNNEL_IP6_FLAGS */ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate)); /* LWTUNNEL_IP6_OPTS */ } static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .build_state = ip6_tun_build_state, .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) { /* If you land here, make sure whether increasing ip_tunnel_info's * options_len is a reasonable choice with its usage in front ends * (f.e., it's part of flow keys, etc). */ BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255); lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6); } DEFINE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); EXPORT_SYMBOL(ip_tunnel_metadata_cnt); void ip_tunnel_need_metadata(void) { static_branch_inc(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata); void ip_tunnel_unneed_metadata(void) { static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); /* Returns either the correct skb->protocol value, or 0 if invalid. */ __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) { if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4) return htons(ETH_P_IP); if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6) return htons(ETH_P_IPV6); return 0; } EXPORT_SYMBOL(ip_tunnel_parse_protocol); const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; EXPORT_SYMBOL(ip_tunnel_header_ops); /* This function returns true when ENCAP attributes are present in the nl msg */ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap) { bool ret = false; memset(encap, 0, sizeof(*encap)); if (!data) return ret; if (data[IFLA_IPTUN_ENCAP_TYPE]) { ret = true; encap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); } if (data[IFLA_IPTUN_ENCAP_FLAGS]) { ret = true; encap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); } if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; encap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; encap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; } EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); void ip_tunnel_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm_kern *parms) { if (data[IFLA_IPTUN_LINK]) parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); if (parms->iph.ttl) parms->iph.frag_off = htons(IP_DF); } if (data[IFLA_IPTUN_TOS]) parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); if (data[IFLA_IPTUN_FLAGS]) { __be16 flags; flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); ip_tunnel_flags_from_be16(parms->i_flags, flags); } if (data[IFLA_IPTUN_PROTO]) parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); } EXPORT_SYMBOL_GPL(ip_tunnel_netlink_parms);
4 7 6 3 11 11 7 12 12 11 8 12 12 7 5 2 6 1 1 6 1 1 1 1 1 7 7 7 1 11 11 7 74 74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 // SPDX-License-Identifier: GPL-2.0 #include <linux/in.h> #include <linux/inet.h> #include <linux/list.h> #include <linux/module.h> #include <linux/net.h> #include <linux/proc_fs.h> #include <linux/rculist.h> #include <linux/seq_file.h> #include <linux/socket.h> #include <net/inet_sock.h> #include <net/kcm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/tcp.h> #ifdef CONFIG_PROC_FS static struct kcm_mux *kcm_get_first(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); return list_first_or_null_rcu(&knet->mux_list, struct kcm_mux, kcm_mux_list); } static struct kcm_mux *kcm_get_next(struct kcm_mux *mux) { struct kcm_net *knet = mux->knet; return list_next_or_null_rcu(&knet->mux_list, &mux->kcm_mux_list, struct kcm_mux, kcm_mux_list); } static struct kcm_mux *kcm_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); struct kcm_mux *m; list_for_each_entry_rcu(m, &knet->mux_list, kcm_mux_list) { if (!pos) return m; --pos; } return NULL; } static void *kcm_seq_next(struct seq_file *seq, void *v, loff_t *pos) { void *p; if (v == SEQ_START_TOKEN) p = kcm_get_first(seq); else p = kcm_get_next(v); ++*pos; return p; } static void *kcm_seq_start(struct seq_file *seq, loff_t *pos) __acquires(rcu) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; else return kcm_get_idx(seq, *pos - 1); } static void kcm_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { rcu_read_unlock(); } struct kcm_proc_mux_state { struct seq_net_private p; int idx; }; static void kcm_format_mux_header(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); seq_printf(seq, "*** KCM statistics (%d MUX) ****\n", knet->count); seq_printf(seq, "%-14s %-10s %-16s %-10s %-16s %-8s %-8s %-8s %-8s %s", "Object", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "Recv-Q", "Rmem", "Send-Q", "Smem", "Status"); /* XXX: pdsts header stuff here */ seq_puts(seq, "\n"); } static void kcm_format_sock(struct kcm_sock *kcm, struct seq_file *seq, int i, int *len) { seq_printf(seq, " kcm-%-7u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8s ", kcm->index, kcm->stats.rx_msgs, kcm->stats.rx_bytes, kcm->stats.tx_msgs, kcm->stats.tx_bytes, kcm->sk.sk_receive_queue.qlen, sk_rmem_alloc_get(&kcm->sk), kcm->sk.sk_write_queue.qlen, "-"); if (kcm->tx_psock) seq_printf(seq, "Psck-%u ", kcm->tx_psock->index); if (kcm->tx_wait) seq_puts(seq, "TxWait "); if (kcm->tx_wait_more) seq_puts(seq, "WMore "); if (kcm->rx_wait) seq_puts(seq, "RxWait "); seq_puts(seq, "\n"); } static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, int i, int *len) { seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, psock->strp.stats.msgs, psock->strp.stats.bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, atomic_read(&psock->sk->sk_rmem_alloc), psock->sk->sk_write_queue.qlen, refcount_read(&psock->sk->sk_wmem_alloc)); if (psock->done) seq_puts(seq, "Done "); if (psock->tx_stopped) seq_puts(seq, "TxStop "); if (psock->strp.stopped) seq_puts(seq, "RxStop "); if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); if (!psock->strp.paused && !psock->ready_rx_msg) { if (psock->sk->sk_receive_queue.qlen) { if (psock->strp.need_bytes) seq_printf(seq, "RxWait=%u ", psock->strp.need_bytes); else seq_printf(seq, "RxWait "); } } else { if (psock->strp.paused) seq_puts(seq, "RxPause "); if (psock->ready_rx_msg) seq_puts(seq, "RdyRx "); } seq_puts(seq, "\n"); } static void kcm_format_mux(struct kcm_mux *mux, loff_t idx, struct seq_file *seq) { int i, len; struct kcm_sock *kcm; struct kcm_psock *psock; /* mux information */ seq_printf(seq, "%-6s%-8s %-10llu %-16llu %-10llu %-16llu %-8s %-8s %-8s %-8s ", "mux", "", mux->stats.rx_msgs, mux->stats.rx_bytes, mux->stats.tx_msgs, mux->stats.tx_bytes, "-", "-", "-", "-"); seq_printf(seq, "KCMs: %d, Psocks %d\n", mux->kcm_socks_cnt, mux->psocks_cnt); /* kcm sock information */ i = 0; spin_lock_bh(&mux->lock); list_for_each_entry(kcm, &mux->kcm_socks, kcm_sock_list) { kcm_format_sock(kcm, seq, i, &len); i++; } i = 0; list_for_each_entry(psock, &mux->psocks, psock_list) { kcm_format_psock(psock, seq, i, &len); i++; } spin_unlock_bh(&mux->lock); } static int kcm_seq_show(struct seq_file *seq, void *v) { struct kcm_proc_mux_state *mux_state; mux_state = seq->private; if (v == SEQ_START_TOKEN) { mux_state->idx = 0; kcm_format_mux_header(seq); } else { kcm_format_mux(v, mux_state->idx, seq); mux_state->idx++; } return 0; } static const struct seq_operations kcm_seq_ops = { .show = kcm_seq_show, .start = kcm_seq_start, .next = kcm_seq_next, .stop = kcm_seq_stop, }; static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; struct kcm_net *knet = net_generic(net, kcm_net_id); memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); aggregate_strp_stats(&knet->aggregate_strp_stats, &strp_stats); list_for_each_entry(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); aggregate_strp_stats(&mux->aggregate_strp_stats, &strp_stats); list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); save_strp_stats(&psock->strp, &strp_stats); } spin_unlock_bh(&mux->lock); } mutex_unlock(&knet->mutex); seq_printf(seq, "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s\n", "MUX", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "TX-Retries", "Attach", "Unattach", "UnattchRsvd", "RX-RdyDrops"); seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10u %-10u %-10u %-10u %-10u\n", "", mux_stats.rx_msgs, mux_stats.rx_bytes, mux_stats.tx_msgs, mux_stats.tx_bytes, mux_stats.tx_retries, mux_stats.psock_attach, mux_stats.psock_unattach_rsvd, mux_stats.psock_unattach, mux_stats.rx_ready_drops); seq_printf(seq, "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "Reserved", "Unreserved", "RX-Aborts", "RX-Intr", "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", "RX-TooBig", "RX-Timeout", "TX-Aborts"); seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", strp_stats.msgs, strp_stats.bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, strp_stats.aborts, strp_stats.interrupted, strp_stats.unrecov_intr, strp_stats.mem_fail, strp_stats.need_more_hdr, strp_stats.bad_hdr_len, strp_stats.msg_too_big, strp_stats.msg_timeouts, psock_stats.tx_aborts); return 0; } static int kcm_proc_init_net(struct net *net) { if (!proc_create_net_single("kcm_stats", 0444, net->proc_net, kcm_stats_seq_show, NULL)) goto out_kcm_stats; if (!proc_create_net("kcm", 0444, net->proc_net, &kcm_seq_ops, sizeof(struct kcm_proc_mux_state))) goto out_kcm; return 0; out_kcm: remove_proc_entry("kcm_stats", net->proc_net); out_kcm_stats: return -ENOMEM; } static void kcm_proc_exit_net(struct net *net) { remove_proc_entry("kcm", net->proc_net); remove_proc_entry("kcm_stats", net->proc_net); } static struct pernet_operations kcm_net_ops = { .init = kcm_proc_init_net, .exit = kcm_proc_exit_net, }; int __init kcm_proc_init(void) { return register_pernet_subsys(&kcm_net_ops); } void __exit kcm_proc_exit(void) { unregister_pernet_subsys(&kcm_net_ops); } #endif /* CONFIG_PROC_FS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IO_H #define _ASM_X86_IO_H /* * This file contains the definitions for the x86 IO instructions * inb/inw/inl/outb/outw/outl and the "string versions" of the same * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" * versions of the single-IO instructions (inb_p/inw_p/..). * * This file is not meant to be obfuscating: it's just complicated * to (a) handle it all in a way that makes gcc able to optimize it * as well as possible and (b) trying to avoid writing the same thing * over and over again with slight variations and possibly making a * mistake somewhere. */ /* * Thanks to James van Artsdalen for a better timing-fix than * the two short jumps: using outb's to a nonexistent port seems * to guarantee better timings even on fast machines. * * On the other hand, I'd like to be sure of a non-existent port: * I feel a bit unsafe about using 0x80 (should be safe, though) * * Linus */ /* * Bit simplified and optimized by Jan Hubicka * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. * * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, * isa_read[wl] and isa_write[wl] fixed * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/string.h> #include <linux/compiler.h> #include <linux/cc_platform.h> #include <asm/page.h> #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> #include <asm/shared/io.h> #include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ :"m" (*(volatile type __force *)addr) barrier); return ret; } #define build_mmio_write(name, size, type, reg, barrier) \ static inline void name(type val, volatile void __iomem *addr) \ { asm volatile("mov" size " %0,%1": :reg (val), \ "m" (*(volatile type __force *)addr) barrier); } build_mmio_read(readb, "b", unsigned char, "=q", :"memory") build_mmio_read(readw, "w", unsigned short, "=r", :"memory") build_mmio_read(readl, "l", unsigned int, "=r", :"memory") build_mmio_read(__readb, "b", unsigned char, "=q", ) build_mmio_read(__readw, "w", unsigned short, "=r", ) build_mmio_read(__readl, "l", unsigned int, "=r", ) build_mmio_write(writeb, "b", unsigned char, "q", :"memory") build_mmio_write(writew, "w", unsigned short, "r", :"memory") build_mmio_write(writel, "l", unsigned int, "r", :"memory") build_mmio_write(__writeb, "b", unsigned char, "q", ) build_mmio_write(__writew, "w", unsigned short, "r", ) build_mmio_write(__writel, "l", unsigned int, "r", ) #define readb readb #define readw readw #define readl readl #define readb_relaxed(a) __readb(a) #define readw_relaxed(a) __readw(a) #define readl_relaxed(a) __readl(a) #define __raw_readb __readb #define __raw_readw __readw #define __raw_readl __readl #define writeb writeb #define writew writew #define writel writel #define writeb_relaxed(v, a) __writeb(v, a) #define writew_relaxed(v, a) __writew(v, a) #define writel_relaxed(v, a) __writel(v, a) #define __raw_writeb __writeb #define __raw_writew __writew #define __raw_writel __writel #ifdef CONFIG_X86_64 build_mmio_read(readq, "q", u64, "=r", :"memory") build_mmio_read(__readq, "q", u64, "=r", ) build_mmio_write(writeq, "q", u64, "r", :"memory") build_mmio_write(__writeq, "q", u64, "r", ) #define readq_relaxed(a) __readq(a) #define writeq_relaxed(v, a) __writeq(v, a) #define __raw_readq __readq #define __raw_writeq __writeq /* Let people know that we have them */ #define readq readq #define writeq writeq #endif #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); /** * virt_to_phys - map virtual addresses to physical * @address: address to remap * * The returned physical address is the physical (CPU) mapping for * the memory address given. It is only valid to use this function on * addresses directly mapped or allocated via kmalloc. * * This function does not give bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline phys_addr_t virt_to_phys(volatile void *address) { return __pa(address); } #define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual * @address: address to remap * * The returned virtual address is a current CPU mapping for * the memory address given. It is only valid to use this function on * addresses that have a kernel mapping * * This function does not handle bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline void *phys_to_virt(phys_addr_t address) { return __va(address); } #define phys_to_virt phys_to_virt /* * ISA I/O bus memory addresses are 1:1 with the physical address. * However, we truncate the address to unsigned int to avoid undesirable * promotions in legacy drivers. */ static inline unsigned int isa_virt_to_bus(volatile void *address) { return (unsigned int)virt_to_phys(address); } #define isa_bus_to_virt phys_to_virt /* * The default ioremap() behavior is non-cached; if you need something * else, you probably want one of the following. */ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory * @size: size of the resource to map * * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual * address. * * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ void __iomem *ioremap(resource_size_t offset, unsigned long size); #define ioremap ioremap extern void iounmap(volatile void __iomem *addr); #define iounmap iounmap #ifdef __KERNEL__ void memcpy_fromio(void *, const volatile void __iomem *, size_t); void memcpy_toio(volatile void __iomem *, const void *, size_t); void memset_io(volatile void __iomem *, int, size_t); #define memcpy_fromio memcpy_fromio #define memcpy_toio memcpy_toio #define memset_io memset_io #ifdef CONFIG_X86_64 /* * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy * loop. */ static inline void __iowrite32_copy(void __iomem *to, const void *from, size_t count) { asm volatile("rep ; movsl" : "=&c"(count), "=&D"(to), "=&S"(from) : "0"(count), "1"(to), "2"(from) : "memory"); } #define __iowrite32_copy __iowrite32_copy #endif /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped * to PAGE_OFFSET is pure coincidence - it does not mean ISA values * are physical addresses. The following constant pointer can be * used as the IO-area pointer (it can be iounmapped as well, so the * analogy with PCI is quite large): */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) #endif /* __KERNEL__ */ extern void native_io_delay(void); extern int io_delay_type; extern void io_delay_init(void); #if defined(CONFIG_PARAVIRT) #include <asm/paravirt.h> #else static inline void slow_down_io(void) { native_io_delay(); #ifdef REALLY_SLOW_IO native_io_delay(); native_io_delay(); native_io_delay(); #endif } #endif #define BUILDIO(bwl, type) \ static inline void out##bwl##_p(type value, u16 port) \ { \ out##bwl(value, port); \ slow_down_io(); \ } \ \ static inline type in##bwl##_p(u16 port) \ { \ type value = in##bwl(port); \ slow_down_io(); \ return value; \ } \ \ static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ out##bwl(*value, port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep; outs" #bwl \ : "+S"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } \ \ static inline void ins##bwl(u16 port, void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ *value = in##bwl(port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep; ins" #bwl \ : "+D"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } BUILDIO(b, u8) BUILDIO(w, u16) BUILDIO(l, u32) #undef BUILDIO #define inb_p inb_p #define inw_p inw_p #define inl_p inl_p #define insb insb #define insw insw #define insl insl #define outb_p outb_p #define outw_p outw_p #define outl_p outl_p #define outsb outsb #define outsw outsw #define outsl outsl extern void *xlate_dev_mem_ptr(phys_addr_t phys); extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define xlate_dev_mem_ptr xlate_dev_mem_ptr #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); #define ioremap_wc ioremap_wc extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); #define ioremap_wt ioremap_wt extern bool is_early_ioremap_ptep(pte_t *ptep); #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> #undef PCI_IOBASE #ifdef CONFIG_MTRR extern int __must_check arch_phys_wc_index(int handle); #define arch_phys_wc_index arch_phys_wc_index extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif #ifdef CONFIG_X86_PAT extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif #ifdef CONFIG_AMD_MEM_ENCRYPT extern bool arch_memremap_can_ram_remap(resource_size_t offset, unsigned long size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); #else static inline bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) { return true; } #endif /** * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units * @dst: destination, in MMIO space (must be 512-bit aligned) * @src: source * @count: number of 512 bits quantities to submit * * Submit data from kernel space to MMIO space, in units of 512 bits at a * time. Order of access is not guaranteed, nor is a memory barrier * performed afterwards. * * Warning: Do not use this helper unless your driver has checked that the CPU * instruction is supported on the platform. */ static inline void iosubmit_cmds512(void __iomem *dst, const void *src, size_t count) { const u8 *from = src; const u8 *end = from + count * 64; while (from < end) { movdir64b_io(dst, from); from += 64; } } #endif /* _ASM_X86_IO_H */
1 3 12 14 4 23 18 5 18 18 18 2 10 12 18 6 12 1 11 1 11 12 13 10 2 2 1 1 1 15 5 1 9 8 1 7 2 1 4 5 4 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/fsnotify.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "rsrc.h" #include "openclose.h" struct io_open { struct file *file; int dfd; u32 file_slot; struct filename *filename; struct open_how how; unsigned long nofile; }; struct io_close { struct file *file; int fd; u32 file_slot; }; struct io_fixed_install { struct file *file; unsigned int o_flags; }; static bool io_openat_force_async(struct io_open *open) { /* * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, * it'll always -EAGAIN. Note that we test for __O_TMPFILE because * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force * async for. */ return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE); } static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_open *open = io_kiocb_to_cmd(req, struct io_open); const char __user *fname; int ret; if (unlikely(sqe->buf_index)) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; /* open.how should be already initialised */ if (!(open->how.flags & O_PATH) && force_o_largefile()) open->how.flags |= O_LARGEFILE; open->dfd = READ_ONCE(sqe->fd); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); open->filename = getname(fname); if (IS_ERR(open->filename)) { ret = PTR_ERR(open->filename); open->filename = NULL; return ret; } open->file_slot = READ_ONCE(sqe->file_index); if (open->file_slot && (open->how.flags & O_CLOEXEC)) return -EINVAL; open->nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; if (io_openat_force_async(open)) req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_open *open = io_kiocb_to_cmd(req, struct io_open); u64 mode = READ_ONCE(sqe->len); u64 flags = READ_ONCE(sqe->open_flags); open->how = build_open_how(flags, mode); return __io_openat_prep(req, sqe); } int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_open *open = io_kiocb_to_cmd(req, struct io_open); struct open_how __user *how; size_t len; int ret; how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); len = READ_ONCE(sqe->len); if (len < OPEN_HOW_SIZE_VER0) return -EINVAL; ret = copy_struct_from_user(&open->how, sizeof(open->how), how, len); if (ret) return ret; return __io_openat_prep(req, sqe); } int io_openat2(struct io_kiocb *req, unsigned int issue_flags) { struct io_open *open = io_kiocb_to_cmd(req, struct io_open); struct open_flags op; struct file *file; bool resolve_nonblock, nonblock_set; bool fixed = !!open->file_slot; int ret; ret = build_open_flags(&open->how, &op); if (ret) goto err; nonblock_set = op.open_flag & O_NONBLOCK; resolve_nonblock = open->how.resolve & RESOLVE_CACHED; if (issue_flags & IO_URING_F_NONBLOCK) { WARN_ON_ONCE(io_openat_force_async(open)); op.lookup_flags |= LOOKUP_CACHED; op.open_flag |= O_NONBLOCK; } if (!fixed) { ret = __get_unused_fd_flags(open->how.flags, open->nofile); if (ret < 0) goto err; } file = do_filp_open(open->dfd, open->filename, &op); if (IS_ERR(file)) { /* * We could hang on to this 'fd' on retrying, but seems like * marginal gain for something that is now known to be a slower * path. So just put it, and we'll get a new one when we retry. */ if (!fixed) put_unused_fd(ret); ret = PTR_ERR(file); /* only retry if RESOLVE_CACHED wasn't already set by application */ if (ret == -EAGAIN && (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) return -EAGAIN; goto err; } if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) file->f_flags &= ~O_NONBLOCK; if (!fixed) fd_install(ret, file); else ret = io_fixed_fd_install(req, issue_flags, file, open->file_slot); err: putname(open->filename); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } int io_openat(struct io_kiocb *req, unsigned int issue_flags) { return io_openat2(req, issue_flags); } void io_open_cleanup(struct io_kiocb *req) { struct io_open *open = io_kiocb_to_cmd(req, struct io_open); if (open->filename) putname(open->filename); } int __io_close_fixed(struct io_ring_ctx *ctx, unsigned int issue_flags, unsigned int offset) { int ret; io_ring_submit_lock(ctx, issue_flags); ret = io_fixed_fd_remove(ctx, offset); io_ring_submit_unlock(ctx, issue_flags); return ret; } static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) { struct io_close *close = io_kiocb_to_cmd(req, struct io_close); return __io_close_fixed(req->ctx, issue_flags, close->file_slot - 1); } int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_close *close = io_kiocb_to_cmd(req, struct io_close); if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; close->fd = READ_ONCE(sqe->fd); close->file_slot = READ_ONCE(sqe->file_index); if (close->file_slot && close->fd) return -EINVAL; return 0; } int io_close(struct io_kiocb *req, unsigned int issue_flags) { struct files_struct *files = current->files; struct io_close *close = io_kiocb_to_cmd(req, struct io_close); struct file *file; int ret = -EBADF; if (close->file_slot) { ret = io_close_fixed(req, issue_flags); goto err; } spin_lock(&files->file_lock); file = files_lookup_fd_locked(files, close->fd); if (!file || io_is_uring_fops(file)) { spin_unlock(&files->file_lock); goto err; } /* if the file has a flush method, be safe and punt to async */ if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { spin_unlock(&files->file_lock); return -EAGAIN; } file = file_close_fd_locked(files, close->fd); spin_unlock(&files->file_lock); if (!file) goto err; /* No ->flush() or already async, safely close from here */ ret = filp_close(file, current->files); err: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_fixed_install *ifi; unsigned int flags; if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->splice_fd_in || sqe->addr3) return -EINVAL; /* must be a fixed file */ if (!(req->flags & REQ_F_FIXED_FILE)) return -EBADF; flags = READ_ONCE(sqe->install_fd_flags); if (flags & ~IORING_FIXED_FD_NO_CLOEXEC) return -EINVAL; /* ensure the task's creds are used when installing/receiving fds */ if (req->flags & REQ_F_CREDS) return -EPERM; /* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */ ifi = io_kiocb_to_cmd(req, struct io_fixed_install); ifi->o_flags = O_CLOEXEC; if (flags & IORING_FIXED_FD_NO_CLOEXEC) ifi->o_flags = 0; return 0; } int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags) { struct io_fixed_install *ifi; int ret; ifi = io_kiocb_to_cmd(req, struct io_fixed_install); ret = receive_fd(req->file, NULL, ifi->o_flags); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; }
50 135 22 21 22 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 // SPDX-License-Identifier: GPL-2.0-only /* dummy.c: a dummy net driver The purpose of this driver is to provide a device to point a route through, but not to actually transmit packets. Why? If you have a machine whose only connection is an occasional PPP/SLIP/PLIP link, you can only connect to your own hostname when the link is up. Otherwise you have to use localhost. This isn't very consistent. One solution is to set up a dummy link using PPP/SLIP/PLIP, but this seems (to me) too much overhead for too little gain. This driver provides a small alternative. Thus you can do [when not running slip] ifconfig dummy slip.addr.ess.here up [to go to slip] ifconfig dummy down dip whatever This was written by looking at Donald Becker's skeleton driver and the loopback driver. I then threw away anything that didn't apply! Thanks to Alan Cox for the key clue on what to do with misguided packets. Nick Holloway, 27th May 1994 [I tweaked this explanation a little but that's all] Alan Cox, 30th May 1994 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/init.h> #include <linux/moduleparam.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> #define DRV_NAME "dummy" static int numdummies = 1; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) { } static void dummy_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { dev_lstats_read(dev, &stats->tx_packets, &stats->tx_bytes); } static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) { dev_lstats_add(dev, skb->len); skb_tx_timestamp(skb); dev_kfree_skb(skb); return NETDEV_TX_OK; } static int dummy_dev_init(struct net_device *dev) { dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; netdev_lockdep_set_classes(dev); return 0; } static int dummy_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) netif_carrier_on(dev); else netif_carrier_off(dev); return 0; } static const struct net_device_ops dummy_netdev_ops = { .ndo_init = dummy_dev_init, .ndo_start_xmit = dummy_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = dummy_get_stats64, .ndo_change_carrier = dummy_change_carrier, }; static const struct ethtool_ops dummy_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static void dummy_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->lltx = true; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; dev->hw_enc_features |= dev->features; eth_hw_addr_random(dev); dev->min_mtu = 0; dev->max_mtu = 0; } static int dummy_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } return 0; } static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = DRV_NAME, .setup = dummy_setup, .validate = dummy_validate, }; /* Number of dummy devices to be set up by this module. */ module_param(numdummies, int, 0); MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); static int __init dummy_init_one(void) { struct net_device *dev_dummy; int err; dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_ENUM, dummy_setup); if (!dev_dummy) return -ENOMEM; dev_dummy->rtnl_link_ops = &dummy_link_ops; err = register_netdevice(dev_dummy); if (err < 0) goto err; return 0; err: free_netdev(dev_dummy); return err; } static int __init dummy_init_module(void) { int i, err = 0; err = rtnl_link_register(&dummy_link_ops); if (err < 0) return err; rtnl_net_lock(&init_net); for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); cond_resched(); } rtnl_net_unlock(&init_net); if (err < 0) rtnl_link_unregister(&dummy_link_ops); return err; } static void __exit dummy_cleanup_module(void) { rtnl_link_unregister(&dummy_link_ops); } module_init(dummy_init_module); module_exit(dummy_cleanup_module); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Dummy netdevice driver which discards all packets sent to it"); MODULE_ALIAS_RTNL_LINK(DRV_NAME);
11 11 3 5 1 4 11 81 70 11 11 11 1 3 11 2 2 4 4 4 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 // SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> */ #include <linux/module.h> #include <linux/fs.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/posix_acl_xattr.h> #include "xattr.h" #include "acl.h" #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" static struct posix_acl *v9fs_fid_get_acl(struct p9_fid *fid, const char *name) { ssize_t size; void *value = NULL; struct posix_acl *acl = NULL; size = v9fs_fid_xattr_get(fid, name, NULL, 0); if (size < 0) return ERR_PTR(size); if (size == 0) return ERR_PTR(-ENODATA); value = kzalloc(size, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); size = v9fs_fid_xattr_get(fid, name, value, size); if (size < 0) acl = ERR_PTR(size); else if (size == 0) acl = ERR_PTR(-ENODATA); else acl = posix_acl_from_xattr(&init_user_ns, value, size); kfree(value); return acl; } static struct posix_acl *v9fs_acl_get(struct dentry *dentry, const char *name) { struct p9_fid *fid; struct posix_acl *acl = NULL; fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); acl = v9fs_fid_get_acl(fid, name); p9_fid_put(fid); return acl; } static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, const char *name) { int retval; struct posix_acl *acl = NULL; acl = v9fs_fid_get_acl(fid, name); if (!IS_ERR(acl)) return acl; retval = PTR_ERR(acl); if (retval == -ENODATA || retval == -ENOSYS || retval == -EOPNOTSUPP) return NULL; /* map everything else to -EIO */ return ERR_PTR(-EIO); } int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) { int retval = 0; struct posix_acl *pacl, *dacl; struct v9fs_session_info *v9ses; v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); return 0; } /* get the default/access acl values and cache them */ dacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_DEFAULT); pacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_ACCESS); if (!IS_ERR(dacl) && !IS_ERR(pacl)) { set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); } else retval = -EIO; if (!IS_ERR(dacl)) posix_acl_release(dacl); if (!IS_ERR(pacl)) posix_acl_release(pacl); return retval; } static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) { struct posix_acl *acl; /* * 9p Always cache the acl value when * instantiating the inode (v9fs_inode_from_fid) */ acl = get_cached_acl(inode, type); BUG_ON(is_uncached_acl(acl)); return acl; } struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu) { struct v9fs_session_info *v9ses; if (rcu) return ERR_PTR(-ECHILD); v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { /* * On access = client and acl = on mode get the acl * values from the server */ return NULL; } return v9fs_get_cached_acl(inode, type); } struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { struct v9fs_session_info *v9ses; v9ses = v9fs_dentry2v9ses(dentry); /* We allow set/get/list of acl when access=client is not specified. */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) return v9fs_acl_get(dentry, posix_acl_xattr_name(type)); return v9fs_get_cached_acl(d_inode(dentry), type); } int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int retval; size_t size = 0; void *value = NULL; const char *acl_name; struct v9fs_session_info *v9ses; struct inode *inode = d_inode(dentry); if (acl) { retval = posix_acl_valid(inode->i_sb->s_user_ns, acl); if (retval) goto err_out; size = posix_acl_xattr_size(acl->a_count); value = kzalloc(size, GFP_NOFS); if (!value) { retval = -ENOMEM; goto err_out; } retval = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (retval < 0) goto err_out; } /* * set the attribute on the remote. Without even looking at the * xattr value. We leave it to the server to validate */ acl_name = posix_acl_xattr_name(type); v9ses = v9fs_dentry2v9ses(dentry); if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); goto err_out; } if (S_ISLNK(inode->i_mode)) { retval = -EOPNOTSUPP; goto err_out; } if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) { retval = -EPERM; goto err_out; } switch (type) { case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr = {}; struct posix_acl *acl_mode = acl; retval = posix_acl_update_mode(&nop_mnt_idmap, inode, &iattr.ia_mode, &acl_mode); if (retval) goto err_out; if (!acl_mode) { /* * ACL can be represented by the mode bits. * So don't update ACL below. */ kfree(value); value = NULL; size = 0; } iattr.ia_valid = ATTR_MODE; /* * FIXME should we update ctime ? * What is the following setxattr update the mode ? */ v9fs_vfs_setattr_dotl(&nop_mnt_idmap, dentry, &iattr); } break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) { retval = acl ? -EINVAL : 0; goto err_out; } break; } retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); if (!retval) set_cached_acl(inode, type, acl); err_out: kfree(value); return retval; } static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl) { int retval; char *name; size_t size; void *buffer; if (!acl) return 0; /* Set a setxattr request to server */ size = posix_acl_xattr_size(acl->a_count); buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return -ENOMEM; retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); if (retval < 0) goto err_free_out; switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; break; default: BUG(); } retval = v9fs_fid_xattr_set(fid, name, buffer, size, 0); err_free_out: kfree(buffer); return retval; } int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid) { int retval = 0; struct posix_acl *acl; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS); if (acl) { retval = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); if (retval) return retval; set_cached_acl(inode, ACL_TYPE_ACCESS, acl); retval = v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); } return retval; } int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid, struct posix_acl *dacl, struct posix_acl *acl) { set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); set_cached_acl(inode, ACL_TYPE_ACCESS, acl); v9fs_set_acl(fid, ACL_TYPE_DEFAULT, dacl); v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl); return 0; } void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl) { posix_acl_release(dacl); posix_acl_release(acl); } int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { int retval = 0; umode_t mode = *modep; struct posix_acl *acl = NULL; if (!S_ISLNK(mode)) { acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) return PTR_ERR(acl); if (!acl) mode &= ~current_umask(); } if (acl) { if (S_ISDIR(mode)) *dpacl = posix_acl_dup(acl); retval = __posix_acl_create(&acl, GFP_NOFS, &mode); if (retval < 0) return retval; if (retval > 0) *pacl = acl; else posix_acl_release(acl); } *modep = mode; return 0; }
41 1 64 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 /* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Definitions for the SMC module (socket related) * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #ifndef __SMC_H #define __SMC_H #include <linux/socket.h> #include <linux/types.h> #include <linux/compiler.h> /* __aligned */ #include <net/genetlink.h> #include <net/sock.h> #include "smc_ib.h" #define SMC_V1 1 /* SMC version V1 */ #define SMC_V2 2 /* SMC version V2 */ #define SMC_RELEASE_0 0 #define SMC_RELEASE_1 1 #define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; extern struct proto smc_proto6; extern struct smc_hashinfo smc_v4_hashinfo; extern struct smc_hashinfo smc_v6_hashinfo; int smc_hash_sk(struct sock *sk); void smc_unhash_sk(struct sock *sk); void smc_release_cb(struct sock *sk); int smc_release(struct socket *sock); int smc_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int smc_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags); int smc_accept(struct socket *sock, struct socket *new_sock, struct proto_accept_arg *arg); int smc_getname(struct socket *sock, struct sockaddr *addr, int peer); __poll_t smc_poll(struct file *file, struct socket *sock, poll_table *wait); int smc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int smc_listen(struct socket *sock, int backlog); int smc_shutdown(struct socket *sock, int how); int smc_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen); int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* smc sock initialization */ void smc_sk_init(struct net *net, struct sock *sk, int protocol); /* clcsock initialization */ int smc_create_clcsk(struct net *net, struct sock *sk, int family); #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif enum smc_state { /* possible states of an SMC socket */ SMC_ACTIVE = 1, SMC_INIT = 2, SMC_CLOSED = 7, SMC_LISTEN = 10, /* normal close */ SMC_PEERCLOSEWAIT1 = 20, SMC_PEERCLOSEWAIT2 = 21, SMC_APPFINCLOSEWAIT = 24, SMC_APPCLOSEWAIT1 = 22, SMC_APPCLOSEWAIT2 = 23, SMC_PEERFINCLOSEWAIT = 25, /* abnormal close */ SMC_PEERABORTWAIT = 26, SMC_PROCESSABORT = 27, }; enum smc_supplemental_features { SMC_SPF_EMULATED_ISM_DEV = 0, }; #define SMC_FEATURE_MASK \ (BIT(SMC_SPF_EMULATED_ISM_DEV)) struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ union { u8 type; #if defined(__BIG_ENDIAN_BITFIELD) struct { u8 llc_version:4, llc_type:4; }; #elif defined(__LITTLE_ENDIAN_BITFIELD) struct { u8 llc_type:4, llc_version:4; }; #endif }; } __aligned(1); struct smc_cdc_conn_state_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 peer_done_writing : 1; /* Sending done indicator */ u8 peer_conn_closed : 1; /* Peer connection closed indicator */ u8 peer_conn_abort : 1; /* Abnormal close indicator */ u8 reserved : 5; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 5; u8 peer_conn_abort : 1; u8 peer_conn_closed : 1; u8 peer_done_writing : 1; #endif }; struct smc_cdc_producer_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 write_blocked : 1; /* Writing Blocked, no rx buf space */ u8 urg_data_pending : 1; /* Urgent Data Pending */ u8 urg_data_present : 1; /* Urgent Data Present */ u8 cons_curs_upd_req : 1; /* cursor update requested */ u8 failover_validation : 1;/* message replay due to failover */ u8 reserved : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 3; u8 failover_validation : 1; u8 cons_curs_upd_req : 1; u8 urg_data_present : 1; u8 urg_data_pending : 1; u8 write_blocked : 1; #endif }; /* in host byte order */ union smc_host_cursor { /* SMC cursor - an offset in an RMBE */ struct { u16 reserved; u16 wrap; /* window wrap sequence number */ u32 count; /* cursor (= offset) part */ }; #ifdef KERNEL_HAS_ATOMIC64 atomic64_t acurs; /* for atomic processing */ #else u64 acurs; /* for atomic processing */ #endif } __aligned(8); /* in host byte order, except for flag bitfields in network byte order */ struct smc_host_cdc_msg { /* Connection Data Control message */ struct smc_wr_rx_hdr common; /* .type = 0xFE */ u8 len; /* length = 44 */ u16 seqno; /* connection seq # */ u32 token; /* alert_token */ union smc_host_cursor prod; /* producer cursor */ union smc_host_cursor cons; /* consumer cursor, * piggy backed "ack" */ struct smc_cdc_producer_flags prod_flags; /* conn. tx/rx status */ struct smc_cdc_conn_state_flags conn_state_flags; /* peer conn. status*/ u8 reserved[18]; } __aligned(8); enum smc_urg_state { SMC_URG_VALID = 1, /* data present */ SMC_URG_NOTYET = 2, /* data pending */ SMC_URG_READ = 3, /* data was already read */ }; struct smc_mark_woken { bool woken; void *key; wait_queue_entry_t wait_entry; }; struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ struct smc_link *lnk; /* assigned SMC-R link */ u32 alert_token_local; /* unique conn. id */ u8 peer_rmbe_idx; /* from tcp handshake */ int peer_rmbe_size; /* size of peer rx buffer */ atomic_t peer_rmbe_space;/* remaining free bytes in peer * rmbe */ int rtoken_idx; /* idx to peer RMB rkey/addr */ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ int rmbe_size_comp; /* compressed notation */ int rmbe_update_limit; /* lower limit for consumer * cursor update */ struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging * buffer for CDC msg send * .prod cf. TCP snd_nxt * .cons cf. TCP sends ack */ union smc_host_cursor local_tx_ctrl_fin; /* prod crsr - confirmed by peer */ union smc_host_cursor tx_curs_prep; /* tx - prepared data * snd_max..wmem_alloc */ union smc_host_cursor tx_curs_sent; /* tx - sent data * snd_nxt ? */ union smc_host_cursor tx_curs_fin; /* tx - confirmed by peer * snd-wnd-begin ? */ atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe * - inc when post wqe, * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt * .cons cf. TCP snd_una */ union smc_host_cursor rx_curs_confirmed; /* confirmed to peer * source of snd_una ? */ union smc_host_cursor urg_curs; /* points at urgent byte */ enum smc_urg_state urg_state; bool urg_tx_pend; /* urgent data staged */ bool urg_rx_skip_pend; /* indicate urgent oob data * read, but previous regular * data still pending */ char urg_rx_byte; /* urgent byte */ bool tx_in_release_sock; /* flush pending tx data in * sock release_cb() */ atomic_t bytes_to_rcv; /* arrived data, * not yet received */ atomic_t splice_pending; /* number of spliced bytes * pending processing */ #ifndef KERNEL_HAS_ATOMIC64 spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ struct work_struct abort_work; /* abort the connection */ struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ u8 rx_off; /* receive offset: * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ u8 freed : 1; /* normal termination */ u8 out_of_sync : 1; /* out of sync with peer */ }; struct smc_sock { /* smc sock container */ struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; #endif struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); /* original data_ready fct. */ void (*clcsk_write_space)(struct sock *sk); /* original write_space fct. */ void (*clcsk_error_report)(struct sock *sk); /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ bool limit_smc_hs; /* put constraint on handshake */ bool use_fallback; /* fallback to tcp */ int fallback_rsn; /* reason for fallback */ u32 peer_diagnosis; /* decline reason from peer */ atomic_t queued_smc_hs; /* queued smc handshakes */ struct inet_connection_sock_af_ops af_ops; const struct inet_connection_sock_af_ops *ori_af_ops; /* original af ops */ int sockopt_defer_accept; /* sockopt TCP_DEFER_ACCEPT * value */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close * started, waiting for unsent * data to be sent */ u8 connect_nonblock : 1; /* non-blocking connect in * flight */ struct mutex clcsock_release_lock; /* protects clcsock of a listen * socket * */ }; #define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { smc->clcsk_state_change = NULL; smc->clcsk_data_ready = NULL; smc->clcsk_write_space = NULL; smc->clcsk_error_report = NULL; } static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } /* save target_cb in saved_cb, and replace target_cb with new_cb */ static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), void (*new_cb)(struct sock *), void (**saved_cb)(struct sock *)) { /* only save once */ if (!*saved_cb) *saved_cb = *target_cb; *target_cb = new_cb; } /* restore target_cb to saved_cb, and reset saved_cb to NULL */ static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), void (**saved_cb)(struct sock *)) { if (!*saved_cb) return; *target_cb = *saved_cb; *saved_cb = NULL; } extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ #define SMC_SYSTEMID_LEN 8 extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ #define ntohll(x) be64_to_cpu(x) #define htonll(x) cpu_to_be64(x) /* convert an u32 value into network byte order, store it into a 3 byte field */ static inline void hton24(u8 *net, u32 host) { __be32 t; t = cpu_to_be32(host); memcpy(net, ((u8 *)&t) + 1, 3); } /* convert a received 3 byte field into host byte order*/ static inline u32 ntoh24(u8 *net) { __be32 t = 0; memcpy(((u8 *)&t) + 1, net, 3); return be32_to_cpu(t); } #ifdef CONFIG_XFRM static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { return false; } #endif struct smc_gidlist; struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); void smc_fill_gid_list(struct smc_link_group *lgr, struct smc_gidlist *gidlist, struct smc_ib_device *known_dev, u8 *known_gid); /* smc handshake limitation interface for netlink */ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info); int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info); static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) { set_bit(flag, &sk->sk_flags); } #endif /* __SMC_H */
2 2 2 2 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 // SPDX-License-Identifier: GPL-2.0-only /* * Line 6 Pod HD * * Copyright (C) 2011 Stefan Hajnoczi <stefanha@gmail.com> * Copyright (C) 2015 Andrej Krutak <dev@andree.sk> * Copyright (C) 2017 Hans P. Moller <hmoller@uc.cl> */ #include <linux/usb.h> #include <linux/slab.h> #include <linux/module.h> #include <sound/core.h> #include <sound/control.h> #include <sound/pcm.h> #include "driver.h" #include "pcm.h" #define PODHD_STARTUP_DELAY 500 enum { LINE6_PODHD300, LINE6_PODHD400, LINE6_PODHD500, LINE6_PODX3, LINE6_PODX3LIVE, LINE6_PODHD500X, LINE6_PODHDDESKTOP }; struct usb_line6_podhd { /* Generic Line 6 USB data */ struct usb_line6 line6; /* Serial number of device */ u32 serial_number; /* Firmware version */ int firmware_version; /* Monitor level */ int monitor_level; }; #define line6_to_podhd(x) container_of(x, struct usb_line6_podhd, line6) static const struct snd_ratden podhd_ratden = { .num_min = 48000, .num_max = 48000, .num_step = 1, .den = 1, }; static struct line6_pcm_properties podhd_pcm_properties = { .playback_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, .rate_max = 48000, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .capture_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, .rate_max = 48000, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .rates = { .nrats = 1, .rats = &podhd_ratden}, .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ }; static struct line6_pcm_properties podx3_pcm_properties = { .playback_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, .rate_max = 48000, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .capture_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, .rate_max = 48000, /* 1+2: Main signal (out), 3+4: Tone 1, * 5+6: Tone 2, 7+8: raw */ .channels_min = 8, .channels_max = 8, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .rates = { .nrats = 1, .rats = &podhd_ratden}, .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ }; static struct usb_driver podhd_driver; static ssize_t serial_number_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_card *card = dev_to_snd_card(dev); struct usb_line6_podhd *pod = card->private_data; return sysfs_emit(buf, "%u\n", pod->serial_number); } static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_card *card = dev_to_snd_card(dev); struct usb_line6_podhd *pod = card->private_data; return sysfs_emit(buf, "%06x\n", pod->firmware_version); } static DEVICE_ATTR_RO(firmware_version); static DEVICE_ATTR_RO(serial_number); static struct attribute *podhd_dev_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_serial_number.attr, NULL }; static const struct attribute_group podhd_dev_attr_group = { .name = "podhd", .attrs = podhd_dev_attrs, }; /* * POD X3 startup procedure. * * May be compatible with other POD HD's, since it's also similar to the * previous POD setup. In any case, it doesn't seem to be required for the * audio nor bulk interfaces to work. */ static int podhd_dev_start(struct usb_line6_podhd *pod) { int ret; u8 init_bytes[8]; int i; struct usb_device *usbdev = pod->line6.usbdev; ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x11, 0, NULL, 0, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret); goto exit; } /* NOTE: looks like some kind of ping message */ ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x11, 0x0, init_bytes, 3, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(pod->line6.ifcdev, "receive length failed (error %d)\n", ret); goto exit; } pod->firmware_version = (init_bytes[0] << 16) | (init_bytes[1] << 8) | (init_bytes[2] << 0); for (i = 0; i <= 16; i++) { ret = line6_read_data(&pod->line6, 0xf000 + 0x08 * i, init_bytes, 8); if (ret < 0) goto exit; } ret = usb_control_msg_send(usbdev, 0, USB_REQ_SET_FEATURE, USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT, 1, 0, NULL, 0, LINE6_TIMEOUT, GFP_KERNEL); exit: return ret; } static void podhd_startup(struct usb_line6 *line6) { struct usb_line6_podhd *pod = line6_to_podhd(line6); podhd_dev_start(pod); line6_read_serial_number(&pod->line6, &pod->serial_number); if (snd_card_register(line6->card)) dev_err(line6->ifcdev, "Failed to register POD HD card.\n"); } static void podhd_disconnect(struct usb_line6 *line6) { struct usb_line6_podhd *pod = line6_to_podhd(line6); if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL_INFO) { struct usb_interface *intf; intf = usb_ifnum_to_if(line6->usbdev, pod->line6.properties->ctrl_if); if (intf) usb_driver_release_interface(&podhd_driver, intf); } } static const unsigned int float_zero_to_one_lookup[] = { 0x00000000, 0x3c23d70a, 0x3ca3d70a, 0x3cf5c28f, 0x3d23d70a, 0x3d4ccccd, 0x3d75c28f, 0x3d8f5c29, 0x3da3d70a, 0x3db851ec, 0x3dcccccd, 0x3de147ae, 0x3df5c28f, 0x3e051eb8, 0x3e0f5c29, 0x3e19999a, 0x3e23d70a, 0x3e2e147b, 0x3e3851ec, 0x3e428f5c, 0x3e4ccccd, 0x3e570a3d, 0x3e6147ae, 0x3e6b851f, 0x3e75c28f, 0x3e800000, 0x3e851eb8, 0x3e8a3d71, 0x3e8f5c29, 0x3e947ae1, 0x3e99999a, 0x3e9eb852, 0x3ea3d70a, 0x3ea8f5c3, 0x3eae147b, 0x3eb33333, 0x3eb851ec, 0x3ebd70a4, 0x3ec28f5c, 0x3ec7ae14, 0x3ecccccd, 0x3ed1eb85, 0x3ed70a3d, 0x3edc28f6, 0x3ee147ae, 0x3ee66666, 0x3eeb851f, 0x3ef0a3d7, 0x3ef5c28f, 0x3efae148, 0x3f000000, 0x3f028f5c, 0x3f051eb8, 0x3f07ae14, 0x3f0a3d71, 0x3f0ccccd, 0x3f0f5c29, 0x3f11eb85, 0x3f147ae1, 0x3f170a3d, 0x3f19999a, 0x3f1c28f6, 0x3f1eb852, 0x3f2147ae, 0x3f23d70a, 0x3f266666, 0x3f28f5c3, 0x3f2b851f, 0x3f2e147b, 0x3f30a3d7, 0x3f333333, 0x3f35c28f, 0x3f3851ec, 0x3f3ae148, 0x3f3d70a4, 0x3f400000, 0x3f428f5c, 0x3f451eb8, 0x3f47ae14, 0x3f4a3d71, 0x3f4ccccd, 0x3f4f5c29, 0x3f51eb85, 0x3f547ae1, 0x3f570a3d, 0x3f59999a, 0x3f5c28f6, 0x3f5eb852, 0x3f6147ae, 0x3f63d70a, 0x3f666666, 0x3f68f5c3, 0x3f6b851f, 0x3f6e147b, 0x3f70a3d7, 0x3f733333, 0x3f75c28f, 0x3f7851ec, 0x3f7ae148, 0x3f7d70a4, 0x3f800000 }; static void podhd_set_monitor_level(struct usb_line6_podhd *podhd, int value) { unsigned int fl; static const unsigned char msg[16] = { /* Chunk is 0xc bytes (without first word) */ 0x0c, 0x00, /* First chunk in the message */ 0x01, 0x00, /* Message size is 2 4-byte words */ 0x02, 0x00, /* Unknown */ 0x04, 0x41, /* Unknown */ 0x04, 0x00, 0x13, 0x00, /* Volume, LE float32, 0.0 - 1.0 */ 0x00, 0x00, 0x00, 0x00 }; unsigned char *buf; buf = kmemdup(msg, sizeof(msg), GFP_KERNEL); if (!buf) return; if (value < 0) value = 0; if (value >= ARRAY_SIZE(float_zero_to_one_lookup)) value = ARRAY_SIZE(float_zero_to_one_lookup) - 1; fl = float_zero_to_one_lookup[value]; buf[12] = (fl >> 0) & 0xff; buf[13] = (fl >> 8) & 0xff; buf[14] = (fl >> 16) & 0xff; buf[15] = (fl >> 24) & 0xff; line6_send_raw_message(&podhd->line6, buf, sizeof(msg)); kfree(buf); podhd->monitor_level = value; } /* control info callback */ static int snd_podhd_control_monitor_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = 100; uinfo->value.integer.step = 1; return 0; } /* control get callback */ static int snd_podhd_control_monitor_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_line6_pcm *line6pcm = snd_kcontrol_chip(kcontrol); struct usb_line6_podhd *podhd = line6_to_podhd(line6pcm->line6); ucontrol->value.integer.value[0] = podhd->monitor_level; return 0; } /* control put callback */ static int snd_podhd_control_monitor_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_line6_pcm *line6pcm = snd_kcontrol_chip(kcontrol); struct usb_line6_podhd *podhd = line6_to_podhd(line6pcm->line6); if (ucontrol->value.integer.value[0] == podhd->monitor_level) return 0; podhd_set_monitor_level(podhd, ucontrol->value.integer.value[0]); return 1; } /* control definition */ static const struct snd_kcontrol_new podhd_control_monitor = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitor Playback Volume", .index = 0, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .info = snd_podhd_control_monitor_info, .get = snd_podhd_control_monitor_get, .put = snd_podhd_control_monitor_put }; /* Try to init POD HD device. */ static int podhd_init(struct usb_line6 *line6, const struct usb_device_id *id) { int err; struct usb_line6_podhd *pod = line6_to_podhd(line6); struct usb_interface *intf; line6->disconnect = podhd_disconnect; line6->startup = podhd_startup; if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL) { /* claim the data interface */ intf = usb_ifnum_to_if(line6->usbdev, pod->line6.properties->ctrl_if); if (!intf) { dev_err(pod->line6.ifcdev, "interface %d not found\n", pod->line6.properties->ctrl_if); return -ENODEV; } err = usb_driver_claim_interface(&podhd_driver, intf, NULL); if (err != 0) { dev_err(pod->line6.ifcdev, "can't claim interface %d, error %d\n", pod->line6.properties->ctrl_if, err); return err; } } if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL_INFO) { /* create sysfs entries: */ err = snd_card_add_dev_attr(line6->card, &podhd_dev_attr_group); if (err < 0) return err; } if (pod->line6.properties->capabilities & LINE6_CAP_PCM) { /* initialize PCM subsystem: */ err = line6_init_pcm(line6, (id->driver_info == LINE6_PODX3 || id->driver_info == LINE6_PODX3LIVE) ? &podx3_pcm_properties : &podhd_pcm_properties); if (err < 0) return err; } if (pod->line6.properties->capabilities & LINE6_CAP_HWMON_CTL) { podhd_set_monitor_level(pod, 100); err = snd_ctl_add(line6->card, snd_ctl_new1(&podhd_control_monitor, line6->line6pcm)); if (err < 0) return err; } if (!(pod->line6.properties->capabilities & LINE6_CAP_CONTROL_INFO)) { /* register USB audio system directly */ return snd_card_register(line6->card); } /* init device and delay registering */ schedule_delayed_work(&line6->startup_work, msecs_to_jiffies(PODHD_STARTUP_DELAY)); return 0; } #define LINE6_DEVICE(prod) USB_DEVICE(0x0e41, prod) #define LINE6_IF_NUM(prod, n) USB_DEVICE_INTERFACE_NUMBER(0x0e41, prod, n) /* table of devices that work with this driver */ static const struct usb_device_id podhd_id_table[] = { /* TODO: no need to alloc data interfaces when only audio is used */ { LINE6_DEVICE(0x5057), .driver_info = LINE6_PODHD300 }, { LINE6_DEVICE(0x5058), .driver_info = LINE6_PODHD400 }, { LINE6_IF_NUM(0x414D, 0), .driver_info = LINE6_PODHD500 }, { LINE6_IF_NUM(0x414A, 0), .driver_info = LINE6_PODX3 }, { LINE6_IF_NUM(0x414B, 0), .driver_info = LINE6_PODX3LIVE }, { LINE6_IF_NUM(0x4159, 0), .driver_info = LINE6_PODHD500X }, { LINE6_IF_NUM(0x4156, 0), .driver_info = LINE6_PODHDDESKTOP }, {} }; MODULE_DEVICE_TABLE(usb, podhd_id_table); static const struct line6_properties podhd_properties_table[] = { [LINE6_PODHD300] = { .id = "PODHD300", .name = "POD HD300", .capabilities = LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_PODHD400] = { .id = "PODHD400", .name = "POD HD400", .capabilities = LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_PODHD500] = { .id = "PODHD500", .name = "POD HD500", .capabilities = LINE6_CAP_PCM | LINE6_CAP_CONTROL | LINE6_CAP_HWMON | LINE6_CAP_HWMON_CTL, .altsetting = 1, .ctrl_if = 1, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ep_audio_r = 0x86, .ep_audio_w = 0x02, }, [LINE6_PODX3] = { .id = "PODX3", .name = "POD X3", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_INFO | LINE6_CAP_PCM | LINE6_CAP_HWMON | LINE6_CAP_IN_NEEDS_OUT, .altsetting = 1, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ctrl_if = 1, .ep_audio_r = 0x86, .ep_audio_w = 0x02, }, [LINE6_PODX3LIVE] = { .id = "PODX3LIVE", .name = "POD X3 LIVE", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_INFO | LINE6_CAP_PCM | LINE6_CAP_HWMON | LINE6_CAP_IN_NEEDS_OUT, .altsetting = 1, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ctrl_if = 1, .ep_audio_r = 0x86, .ep_audio_w = 0x02, }, [LINE6_PODHD500X] = { .id = "PODHD500X", .name = "POD HD500X", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_HWMON_CTL | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 1, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ctrl_if = 1, .ep_audio_r = 0x86, .ep_audio_w = 0x02, }, [LINE6_PODHDDESKTOP] = { .id = "PODHDDESKTOP", .name = "POD HDDESKTOP", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 1, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ctrl_if = 1, .ep_audio_r = 0x86, .ep_audio_w = 0x02, }, }; /* Probe USB device. */ static int podhd_probe(struct usb_interface *interface, const struct usb_device_id *id) { return line6_probe(interface, id, "Line6-PODHD", &podhd_properties_table[id->driver_info], podhd_init, sizeof(struct usb_line6_podhd)); } static struct usb_driver podhd_driver = { .name = KBUILD_MODNAME, .probe = podhd_probe, .disconnect = line6_disconnect, #ifdef CONFIG_PM .suspend = line6_suspend, .resume = line6_resume, .reset_resume = line6_resume, #endif .id_table = podhd_id_table, }; module_usb_driver(podhd_driver); MODULE_DESCRIPTION("Line 6 PODHD USB driver"); MODULE_LICENSE("GPL");
23 1 1 11 11 12 12 12 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> static void ax25_heartbeat_expiry(struct timer_list *); static void ax25_t1timer_expiry(struct timer_list *); static void ax25_t2timer_expiry(struct timer_list *); static void ax25_t3timer_expiry(struct timer_list *); static void ax25_idletimer_expiry(struct timer_list *); void ax25_setup_timers(ax25_cb *ax25) { timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0); timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0); timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0); timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0); timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0); } void ax25_start_heartbeat(ax25_cb *ax25) { mod_timer(&ax25->timer, jiffies + 5 * HZ); } void ax25_start_t1timer(ax25_cb *ax25) { mod_timer(&ax25->t1timer, jiffies + ax25->t1); } void ax25_start_t2timer(ax25_cb *ax25) { mod_timer(&ax25->t2timer, jiffies + ax25->t2); } void ax25_start_t3timer(ax25_cb *ax25) { if (ax25->t3 > 0) mod_timer(&ax25->t3timer, jiffies + ax25->t3); else del_timer(&ax25->t3timer); } void ax25_start_idletimer(ax25_cb *ax25) { if (ax25->idle > 0) mod_timer(&ax25->idletimer, jiffies + ax25->idle); else del_timer(&ax25->idletimer); } void ax25_stop_heartbeat(ax25_cb *ax25) { del_timer(&ax25->timer); } void ax25_stop_t1timer(ax25_cb *ax25) { del_timer(&ax25->t1timer); } void ax25_stop_t2timer(ax25_cb *ax25) { del_timer(&ax25->t2timer); } void ax25_stop_t3timer(ax25_cb *ax25) { del_timer(&ax25->t3timer); } void ax25_stop_idletimer(ax25_cb *ax25) { del_timer(&ax25->idletimer); } int ax25_t1timer_running(ax25_cb *ax25) { return timer_pending(&ax25->t1timer); } unsigned long ax25_display_timer(struct timer_list *timer) { long delta = timer->expires - jiffies; if (!timer_pending(timer)) return 0; return max(0L, delta); } EXPORT_SYMBOL(ax25_display_timer); static void ax25_heartbeat_expiry(struct timer_list *t) { int proto = AX25_PROTO_STD_SIMPLEX; ax25_cb *ax25 = from_timer(ax25, t, timer); if (ax25->ax25_dev) proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; switch (proto) { case AX25_PROTO_STD_SIMPLEX: case AX25_PROTO_STD_DUPLEX: ax25_std_heartbeat_expiry(ax25); break; #ifdef CONFIG_AX25_DAMA_SLAVE case AX25_PROTO_DAMA_SLAVE: if (ax25->ax25_dev->dama.slave) ax25_ds_heartbeat_expiry(ax25); else ax25_std_heartbeat_expiry(ax25); break; #endif } } static void ax25_t1timer_expiry(struct timer_list *t) { ax25_cb *ax25 = from_timer(ax25, t, t1timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: case AX25_PROTO_STD_DUPLEX: ax25_std_t1timer_expiry(ax25); break; #ifdef CONFIG_AX25_DAMA_SLAVE case AX25_PROTO_DAMA_SLAVE: if (!ax25->ax25_dev->dama.slave) ax25_std_t1timer_expiry(ax25); break; #endif } } static void ax25_t2timer_expiry(struct timer_list *t) { ax25_cb *ax25 = from_timer(ax25, t, t2timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: case AX25_PROTO_STD_DUPLEX: ax25_std_t2timer_expiry(ax25); break; #ifdef CONFIG_AX25_DAMA_SLAVE case AX25_PROTO_DAMA_SLAVE: if (!ax25->ax25_dev->dama.slave) ax25_std_t2timer_expiry(ax25); break; #endif } } static void ax25_t3timer_expiry(struct timer_list *t) { ax25_cb *ax25 = from_timer(ax25, t, t3timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: case AX25_PROTO_STD_DUPLEX: ax25_std_t3timer_expiry(ax25); break; #ifdef CONFIG_AX25_DAMA_SLAVE case AX25_PROTO_DAMA_SLAVE: if (ax25->ax25_dev->dama.slave) ax25_ds_t3timer_expiry(ax25); else ax25_std_t3timer_expiry(ax25); break; #endif } } static void ax25_idletimer_expiry(struct timer_list *t) { ax25_cb *ax25 = from_timer(ax25, t, idletimer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: case AX25_PROTO_STD_DUPLEX: ax25_std_idletimer_expiry(ax25); break; #ifdef CONFIG_AX25_DAMA_SLAVE case AX25_PROTO_DAMA_SLAVE: if (ax25->ax25_dev->dama.slave) ax25_ds_idletimer_expiry(ax25); else ax25_std_idletimer_expiry(ax25); break; #endif } }
334 54 337 107 107 57 103 6 107 107 84 61 1082 2091 1082 2093 1142 1128 1144 337 1143 1055 810 60 2090 2094 2093 2092 2091 2093 1914 1917 1914 268 2091 2089 12 2093 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/mmdebug.h> #include <linux/mm_types.h> #include <linux/mm_inline.h> #include <linux/pagemap.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/swap.h> #include <linux/rmap.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #ifndef CONFIG_MMU_GATHER_NO_GATHER static bool tlb_next_batch(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; /* Limit batching if we have delayed rmaps pending */ if (tlb->delayed_rmap && tlb->active != &tlb->local) return false; batch = tlb->active; if (batch->next) { tlb->active = batch->next; return true; } if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) return false; batch = (void *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (!batch) return false; tlb->batch_count++; batch->next = NULL; batch->nr = 0; batch->max = MAX_GATHER_BATCH; tlb->active->next = batch; tlb->active = batch; return true; } #ifdef CONFIG_SMP static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma) { struct encoded_page **pages = batch->encoded_pages; for (int i = 0; i < batch->nr; i++) { struct encoded_page *enc = pages[i]; if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) { struct page *page = encoded_page_ptr(enc); unsigned int nr_pages = 1; if (unlikely(encoded_page_flags(enc) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) nr_pages = encoded_nr_pages(pages[++i]); folio_remove_rmap_ptes(page_folio(page), page, nr_pages, vma); } } } /** * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB * @tlb: the current mmu_gather * @vma: The memory area from which the pages are being removed. * * Note that because of how tlb_next_batch() above works, we will * never start multiple new batches with pending delayed rmaps, so * we only need to walk through the current active batch and the * original local one. */ void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (!tlb->delayed_rmap) return; tlb_flush_rmap_batch(&tlb->local, vma); if (tlb->active != &tlb->local) tlb_flush_rmap_batch(tlb->active, vma); tlb->delayed_rmap = 0; } #endif /* * We might end up freeing a lot of pages. Reschedule on a regular * basis to avoid soft lockups in configurations without full * preemption enabled. The magic number of 512 folios seems to work. */ #define MAX_NR_FOLIOS_PER_FREE 512 static void __tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) { struct encoded_page **pages = batch->encoded_pages; unsigned int nr, nr_pages; while (batch->nr) { if (!page_poisoning_enabled_static() && !want_init_on_free()) { nr = min(MAX_NR_FOLIOS_PER_FREE, batch->nr); /* * Make sure we cover page + nr_pages, and don't leave * nr_pages behind when capping the number of entries. */ if (unlikely(encoded_page_flags(pages[nr - 1]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) nr++; } else { /* * With page poisoning and init_on_free, the time it * takes to free memory grows proportionally with the * actual memory size. Therefore, limit based on the * actual memory size and not the number of involved * folios. */ for (nr = 0, nr_pages = 0; nr < batch->nr && nr_pages < MAX_NR_FOLIOS_PER_FREE; nr++) { if (unlikely(encoded_page_flags(pages[nr]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) nr_pages += encoded_nr_pages(pages[++nr]); else nr_pages++; } } free_pages_and_swap_cache(pages, nr); pages += nr; batch->nr -= nr; cond_resched(); } } static void tlb_batch_pages_flush(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; for (batch = &tlb->local; batch && batch->nr; batch = batch->next) __tlb_batch_free_encoded_pages(batch); tlb->active = &tlb->local; } static void tlb_batch_list_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch, *next; for (batch = tlb->local.next; batch; batch = next) { next = batch->next; free_pages((unsigned long)batch, 0); } tlb->local.next = NULL; } static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb, struct page *page, unsigned int nr_pages, bool delay_rmap, int page_size) { int flags = delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0; struct mmu_gather_batch *batch; VM_BUG_ON(!tlb->end); #ifdef CONFIG_MMU_GATHER_PAGE_SIZE VM_WARN_ON(tlb->page_size != page_size); VM_WARN_ON_ONCE(nr_pages != 1 && page_size != PAGE_SIZE); VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1)); #endif batch = tlb->active; /* * Add the page and check if we are full. If so * force a flush. */ if (likely(nr_pages == 1)) { batch->encoded_pages[batch->nr++] = encode_page(page, flags); } else { flags |= ENCODED_PAGE_BIT_NR_PAGES_NEXT; batch->encoded_pages[batch->nr++] = encode_page(page, flags); batch->encoded_pages[batch->nr++] = encode_nr_pages(nr_pages); } /* * Make sure that we can always add another "page" + "nr_pages", * requiring two entries instead of only a single one. */ if (batch->nr >= batch->max - 1) { if (!tlb_next_batch(tlb)) return true; batch = tlb->active; } VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page); return false; } bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, unsigned int nr_pages, bool delay_rmap) { return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap, PAGE_SIZE); } bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size) { return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size); } #endif /* MMU_GATHER_NO_GATHER */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE static void __tlb_remove_table_free(struct mmu_table_batch *batch) { int i; for (i = 0; i < batch->nr; i++) __tlb_remove_table(batch->tables[i]); free_page((unsigned long)batch); } #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. * * This is needed by some architectures to implement software pagetable walkers. * * gup_fast() and other software pagetable walkers do a lockless page-table * walk and therefore needs some synchronization with the freeing of the page * directories. The chosen means to accomplish that is by disabling IRQs over * the walk. * * Architectures that use IPIs to flush TLBs will then automagically DTRT, * since we unlink the page, flush TLBs, free the page. Since the disabling of * IRQs delays the completion of the TLB flush we can never observe an already * freed page. * * Architectures that do not have this (PPC) need to delay the freeing by some * other means, this is that means. * * What we do is batch the freed directory pages (tables) and RCU free them. * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling * holds off grace periods. * * However, in order to batch these pages we need to allocate storage, this * allocation is deep inside the MM code and can thus easily fail on memory * pressure. To guarantee progress we fall back to single table freeing, see * the implementation of tlb_remove_table_one(). * */ static void tlb_remove_table_smp_sync(void *arg) { /* Simply deliver the interrupt */ } void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be * assumed to be actually RCU-freed. * * It is however sufficient for software page-table walkers that rely on * IRQ disabling. */ smp_call_function(tlb_remove_table_smp_sync, NULL, 1); } static void tlb_remove_table_rcu(struct rcu_head *head) { __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); } static void tlb_remove_table_free(struct mmu_table_batch *batch) { call_rcu(&batch->rcu, tlb_remove_table_rcu); } #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); } #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ /* * If we want tlb_remove_table() to imply TLB invalidates. */ static inline void tlb_table_invalidate(struct mmu_gather *tlb) { if (tlb_needs_table_invalidate()) { /* * Invalidate page-table caches used by hardware walkers. Then * we still need to RCU-sched wait while freeing the pages * because software walkers can still be in-flight. */ tlb_flush_mmu_tlbonly(tlb); } } static void tlb_remove_table_one(void *table) { tlb_remove_table_sync_one(); __tlb_remove_table(table); } static void tlb_table_flush(struct mmu_gather *tlb) { struct mmu_table_batch **batch = &tlb->batch; if (*batch) { tlb_table_invalidate(tlb); tlb_remove_table_free(*batch); *batch = NULL; } } void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { tlb_table_invalidate(tlb); tlb_remove_table_one(table); return; } (*batch)->nr = 0; } (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) tlb_table_flush(tlb); } static inline void tlb_table_init(struct mmu_gather *tlb) { tlb->batch = NULL; } #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ static inline void tlb_table_flush(struct mmu_gather *tlb) { } static inline void tlb_table_init(struct mmu_gather *tlb) { } #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ static void tlb_flush_mmu_free(struct mmu_gather *tlb) { tlb_table_flush(tlb); #ifndef CONFIG_MMU_GATHER_NO_GATHER tlb_batch_pages_flush(tlb); #endif } void tlb_flush_mmu(struct mmu_gather *tlb) { tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { tlb->mm = mm; tlb->fullmm = fullmm; #ifndef CONFIG_MMU_GATHER_NO_GATHER tlb->need_flush_all = 0; tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); tlb->active = &tlb->local; tlb->batch_count = 0; #endif tlb->delayed_rmap = 0; tlb_table_init(tlb); #ifdef CONFIG_MMU_GATHER_PAGE_SIZE tlb->page_size = 0; #endif __tlb_reset_range(tlb); inc_tlb_flush_pending(tlb->mm); } /** * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down * @tlb: the mmu_gather structure to initialize * @mm: the mm_struct of the target address space * * Called to initialize an (on-stack) mmu_gather structure for page-table * tear-down from @mm. */ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, false); } /** * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down * @tlb: the mmu_gather structure to initialize * @mm: the mm_struct of the target address space * * In this case, @mm is without users and we're going to destroy the * full address space (exit/execve). * * Called to initialize an (on-stack) mmu_gather structure for page-table * tear-down from @mm. */ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, true); } /** * tlb_finish_mmu - finish an mmu_gather structure * @tlb: the mmu_gather structure to finish * * Called at the end of the shootdown operation to free up any resources that * were required. */ void tlb_finish_mmu(struct mmu_gather *tlb) { /* * If there are parallel threads are doing PTE changes on same range * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB * flush by batching, one thread may end up seeing inconsistent PTEs * and result in having stale TLB entries. So flush TLB forcefully * if we detect parallel PTE batching threads. * * However, some syscalls, e.g. munmap(), may free page tables, this * needs force flush everything in the given range. Otherwise this * may result in having stale TLB entries for some architectures, * e.g. aarch64, that could specify flush what level TLB. */ if (mm_tlb_flush_nested(tlb->mm)) { /* * The aarch64 yields better performance with fullmm by * avoiding multiple CPUs spamming TLBI messages at the * same time. * * On x86 non-fullmm doesn't yield significant difference * against fullmm. */ tlb->fullmm = 1; __tlb_reset_range(tlb); tlb->freed_tables = 1; } tlb_flush_mmu(tlb); #ifndef CONFIG_MMU_GATHER_NO_GATHER tlb_batch_list_free(tlb); #endif dec_tlb_flush_pending(tlb->mm); }
7 240 41 22 247 541 540 414 540 33 595 67 67 637 636 594 595 594 96 467 91 6 24 55 308 437 436 293 5 135 21 78 77 319 337 183 180 171 338 330 4 5 333 12 59 296 298 300 302 301 339 339 340 340 60 318 14 52 334 7 299 106 302 126 251 268 53 251 50 295 25 197 230 185 46 230 337 1 341 339 339 340 340 342 341 60 127 71 181 1 236 14 1 21 37 36 261 2 51 2 308 300 15 315 315 314 378 378 351 57 172 63 404 380 379 194 31 141 240 133 125 240 411 3 25 1 381 63 410 1 62 347 345 351 2 9 5 4 7 4 168 15 152 148 5 6 253 173 1 77 2 243 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 // SPDX-License-Identifier: GPL-2.0 /* * This file contains the procedures for the handling of select and poll * * Created for Linux based loosely upon Mathius Lattner's minix * patches by Peter MacDonald. Heavily edited by Linus. * * 4 February 1994 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS * flag set in its personality we do *not* modify the given timeout * parameter to reflect time remaining. * * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). */ #include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/sched/rt.h> #include <linux/syscalls.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/file.h> #include <linux/fdtable.h> #include <linux/fs.h> #include <linux/rcupdate.h> #include <linux/hrtimer.h> #include <linux/freezer.h> #include <net/busy_poll.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> /* * Estimate expected accuracy in ns from a timeval. * * After quite a bit of churning around, we've settled on * a simple thing of taking 0.1% of the timeout as the * slack, with a cap of 100 msec. * "nice" tasks get a 0.5% slack instead. * * Consider this comment an open invitation to come up with even * better solutions.. */ #define MAX_SLACK (100 * NSEC_PER_MSEC) static long __estimate_accuracy(struct timespec64 *tv) { long slack; int divfactor = 1000; if (tv->tv_sec < 0) return 0; if (task_nice(current) > 0) divfactor = divfactor / 5; if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) return MAX_SLACK; slack = tv->tv_nsec / divfactor; slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); if (slack > MAX_SLACK) return MAX_SLACK; return slack; } u64 select_estimate_accuracy(struct timespec64 *tv) { u64 ret; struct timespec64 now; u64 slack = current->timer_slack_ns; if (slack == 0) return 0; ktime_get_ts64(&now); now = timespec64_sub(*tv, now); ret = __estimate_accuracy(&now); if (ret < slack) return slack; return ret; } struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; struct poll_table_entry entries[]; }; #define POLL_TABLE_FULL(table) \ ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. * I have rewritten this, taking some shortcuts: This code may not be easy to * follow, but it should be free of race-conditions, and it's practical. If you * understand what I'm doing here, then you understand how the linux * sleep/wakeup mechanism works. * * Two very simple procedures, poll_wait() and poll_freewait() make all the * work. poll_wait() is an inline-function defined in <linux/poll.h>, * as all select/poll functions have to call it to add an entry to the * poll table. */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p); void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); pwq->polling_task = current; pwq->triggered = 0; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0; } EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) { remove_wait_queue(entry->wait_address, &entry->wait); fput(entry->filp); } void poll_freewait(struct poll_wqueues *pwq) { struct poll_table_page * p = pwq->table; int i; for (i = 0; i < pwq->inline_index; i++) free_poll_entry(pwq->inline_entries + i); while (p) { struct poll_table_entry * entry; struct poll_table_page *old; entry = p->entry; do { entry--; free_poll_entry(entry); } while (entry > p->entries); old = p; p = p->next; free_page((unsigned long) old); } } EXPORT_SYMBOL(poll_freewait); static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) { struct poll_table_page *table = p->table; if (p->inline_index < N_INLINE_POLL_ENTRIES) return p->inline_entries + p->inline_index++; if (!table || POLL_TABLE_FULL(table)) { struct poll_table_page *new_table; new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); if (!new_table) { p->error = -ENOMEM; return NULL; } new_table->entry = new_table->entries; new_table->next = table; p->table = new_table; table = new_table; } return table->entry++; } static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); /* * Although this function is called under waitqueue lock, LOCK * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() * and is paired with smp_store_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1; /* * Perform the default wake up operation using a dummy * waitqueue. * * TODO: This is hacky but there currently is no interface to * pass in @sync. @sync is scheduled to be removed and once * that happens, wake_up_process() can be used directly. */ return default_wake_function(&dummy_wait, mode, sync, key); } static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_table_entry *entry; entry = container_of(wait, struct poll_table_entry, wait); if (key && !(key_to_poll(key) & entry->key)) return 0; return __pollwake(wait, mode, sync, key); } /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; entry->filp = get_file(filp); entry->wait_address = wait_address; entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); } static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack) { int rc = -EINTR; set_current_state(state); if (!pwq->triggered) rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* * Prepare for the next iteration. * * The following smp_store_mb() serves two purposes. First, it's * the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing * doesn't pass event check of the next iteration. Note that * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ smp_store_mb(pwq->triggered, 0); return rc; } /** * poll_select_set_timeout - helper function to setup the timeout value * @to: pointer to timespec64 variable for the final timeout * @sec: seconds (from user space) * @nsec: nanoseconds (from user space) * * Note, we do not use a timespec for the user space value here, That * way we can use the function for timeval and compat interfaces as well. * * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. */ int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec) { struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec}; if (!timespec64_valid(&ts)) return -EINVAL; /* Optimize for the zero timeout value here */ if (!sec && !nsec) { to->tv_sec = to->tv_nsec = 0; } else { ktime_get_ts64(to); *to = timespec64_add_safe(*to, ts); } return 0; } enum poll_time_type { PT_TIMEVAL = 0, PT_OLD_TIMEVAL = 1, PT_TIMESPEC = 2, PT_OLD_TIMESPEC = 3, }; static int poll_select_finish(struct timespec64 *end_time, void __user *p, enum poll_time_type pt_type, int ret) { struct timespec64 rts; restore_saved_sigmask_unless(ret == -ERESTARTNOHAND); if (!p) return ret; if (current->personality & STICKY_TIMEOUTS) goto sticky; /* No update for zero timeout */ if (!end_time->tv_sec && !end_time->tv_nsec) return ret; ktime_get_ts64(&rts); rts = timespec64_sub(*end_time, rts); if (rts.tv_sec < 0) rts.tv_sec = rts.tv_nsec = 0; switch (pt_type) { case PT_TIMEVAL: { struct __kernel_old_timeval rtv; if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) memset(&rtv, 0, sizeof(rtv)); rtv.tv_sec = rts.tv_sec; rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; } break; case PT_OLD_TIMEVAL: { struct old_timeval32 rtv; rtv.tv_sec = rts.tv_sec; rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; } break; case PT_TIMESPEC: if (!put_timespec64(&rts, p)) return ret; break; case PT_OLD_TIMESPEC: if (!put_old_timespec32(&rts, p)) return ret; break; default: BUG(); } /* * If an application puts its timeval in read-only memory, we * don't want the Linux-specific update to the timeval to * cause a fault after the select has completed * successfully. However, because we're not updating the * timeval, we can't restart the system call. */ sticky: if (ret == -ERESTARTNOHAND) ret = -EINTR; return ret; } /* * Scalable version of the fd_set. */ typedef struct { unsigned long *in, *out, *ex; unsigned long *res_in, *res_out, *res_ex; } fd_set_bits; /* * How many longwords for "nr" bits? */ #define FDS_BITPERLONG (8*sizeof(long)) #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) /* * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. */ static inline int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) { nr = FDS_BYTES(nr); if (ufdset) return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; memset(fdset, 0, nr); return 0; } static inline unsigned long __must_check set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) { if (ufdset) return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); return 0; } static inline void zero_fd_set(unsigned long nr, unsigned long *fdset) { memset(fdset, 0, FDS_BYTES(nr)); } #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) static int max_select_fd(unsigned long n, fd_set_bits *fds) { unsigned long *open_fds; unsigned long set; int max; struct fdtable *fdt; /* handle last in-complete long-word first */ set = ~(~0UL << (n & (BITS_PER_LONG-1))); n /= BITS_PER_LONG; fdt = files_fdtable(current->files); open_fds = fdt->open_fds + n; max = 0; if (set) { set &= BITS(fds, n); if (set) { if (!(set & ~*open_fds)) goto get_max; return -EBADF; } } while (n) { open_fds--; n--; set = BITS(fds, n); if (!set) continue; if (set & ~*open_fds) return -EBADF; if (max) continue; get_max: do { max++; set >>= 1; } while (set); max += n * BITS_PER_LONG; } return max; } #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ EPOLLNVAL) #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ EPOLLNVAL) #define POLLEX_SET (EPOLLPRI | EPOLLNVAL) static inline __poll_t select_poll_one(int fd, poll_table *wait, unsigned long in, unsigned long out, unsigned long bit, __poll_t ll_flag) { CLASS(fd, f)(fd); if (fd_empty(f)) return EPOLLNVAL; wait->_key = POLLEX_SET | ll_flag; if (in & bit) wait->_key |= POLLIN_SET; if (out & bit) wait->_key |= POLLOUT_SET; return vfs_poll(fd_file(f), wait); } static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; poll_table *wait; int retval, i, timed_out = 0; u64 slack = 0; __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; unsigned long busy_start = 0; rcu_read_lock(); retval = max_select_fd(n, fds); rcu_read_unlock(); if (retval < 0) return retval; n = retval; poll_initwait(&table); wait = &table.pt; if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { wait->_qproc = NULL; timed_out = 1; } if (end_time && !timed_out) slack = select_estimate_accuracy(end_time); retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; bool can_busy_loop = false; inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; for (i = 0; i < n; ++rinp, ++routp, ++rexp) { unsigned long in, out, ex, all_bits, bit = 1, j; unsigned long res_in = 0, res_out = 0, res_ex = 0; __poll_t mask; in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { i += BITS_PER_LONG; continue; } for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { if (i >= n) break; if (!(bit & all_bits)) continue; mask = select_poll_one(i, wait, in, out, bit, busy_flag); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; wait->_qproc = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; wait->_qproc = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; wait->_qproc = NULL; } /* got something, stop busy polling */ if (retval) { can_busy_loop = false; busy_flag = 0; /* * only remember a returned * POLL_BUSY_LOOP if we asked for it */ } else if (busy_flag & mask) can_busy_loop = true; } if (res_in) *rinp = res_in; if (res_out) *routp = res_out; if (res_ex) *rexp = res_ex; cond_resched(); } wait->_qproc = NULL; if (retval || timed_out || signal_pending(current)) break; if (table.error) { retval = table.error; break; } /* only if found POLL_BUSY_LOOP sockets && not out of time */ if (can_busy_loop && !need_resched()) { if (!busy_start) { busy_start = busy_loop_current_time(); continue; } if (!busy_loop_timeout(busy_start)) continue; } busy_flag = 0; /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { expire = timespec64_to_ktime(*end_time); to = &expire; } if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } poll_freewait(&table); return retval; } /* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec64 *end_time) { fd_set_bits fds; void *bits; int ret, max_fds; size_t size, alloc_size; struct fdtable *fdt; /* Allocate small arguments on the stack to save memory and be faster */ long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; ret = -EINVAL; if (n < 0) goto out_nofds; /* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); max_fds = fdt->max_fds; rcu_read_unlock(); if (n > max_fds) n = max_fds; /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words. */ size = FDS_BYTES(n); bits = stack_fds; if (size > sizeof(stack_fds) / 6) { /* Not enough space in on-stack array; must use kmalloc */ ret = -ENOMEM; if (size > (SIZE_MAX / 6)) goto out_nofds; alloc_size = 6 * size; bits = kvmalloc(alloc_size, GFP_KERNEL); if (!bits) goto out_nofds; } fds.in = bits; fds.out = bits + size; fds.ex = bits + 2*size; fds.res_in = bits + 3*size; fds.res_out = bits + 4*size; fds.res_ex = bits + 5*size; if ((ret = get_fd_set(n, inp, fds.in)) || (ret = get_fd_set(n, outp, fds.out)) || (ret = get_fd_set(n, exp, fds.ex))) goto out; zero_fd_set(n, fds.res_in); zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); ret = do_select(n, &fds, end_time); if (ret < 0) goto out; if (!ret) { ret = -ERESTARTNOHAND; if (signal_pending(current)) goto out; ret = 0; } if (set_fd_set(n, inp, fds.res_in) || set_fd_set(n, outp, fds.res_out) || set_fd_set(n, exp, fds.res_ex)) ret = -EFAULT; out: if (bits != stack_fds) kvfree(bits); out_nofds: return ret; } static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { struct timespec64 end_time, *to = NULL; struct __kernel_old_timeval tv; int ret; if (tvp) { if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; } ret = core_sys_select(n, inp, outp, exp, to); return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret); } SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp) { return kern_select(n, inp, outp, exp, tvp); } static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, void __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize, enum poll_time_type type) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { switch (type) { case PT_TIMESPEC: if (get_timespec64(&ts, tsp)) return -EFAULT; break; case PT_OLD_TIMESPEC: if (get_old_timespec32(&ts, tsp)) return -EFAULT; break; default: BUG(); } to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = core_sys_select(n, inp, outp, exp, to); return poll_select_finish(&end_time, tsp, type, ret); } /* * Most architectures can't handle 7-argument syscalls. So we provide a * 6-argument version where the sixth argument is a pointer to a structure * which has a pointer to the sigset_t itself followed by a size_t containing * the sigset size. */ struct sigset_argpack { sigset_t __user *p; size_t size; }; static inline int get_sigset_argpack(struct sigset_argpack *to, struct sigset_argpack __user *from) { // the path is hot enough for overhead of copy_from_user() to matter if (from) { if (can_do_masked_user_access()) from = masked_user_access_begin(from); else if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(to->p, &from->p, Efault); unsafe_get_user(to->size, &from->size, Efault); user_read_access_end(); } return 0; Efault: user_access_end(); return -EFAULT; } SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, fd_set __user *, exp, struct __kernel_timespec __user *, tsp, void __user *, sig) { struct sigset_argpack x = {NULL, 0}; if (get_sigset_argpack(&x, sig)) return -EFAULT; return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC); } #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp, fd_set __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) { struct sigset_argpack x = {NULL, 0}; if (get_sigset_argpack(&x, sig)) return -EFAULT; return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC); } #endif #ifdef __ARCH_WANT_SYS_OLD_SELECT struct sel_arg_struct { unsigned long n; fd_set __user *inp, *outp, *exp; struct __kernel_old_timeval __user *tvp; }; SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) { struct sel_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp); } #endif struct poll_list { struct poll_list *next; unsigned int len; struct pollfd entries[] __counted_by(len); }; #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) /* * Fish for pollable events on the pollfd->fd file descriptor. We're only * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, * if pwait->_qproc is non-NULL. */ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, bool *can_busy_poll, __poll_t busy_flag) { int fd = pollfd->fd; __poll_t mask, filter; if (fd < 0) return 0; CLASS(fd, f)(fd); if (fd_empty(f)) return EPOLLNVAL; /* userland u16 ->events contains POLL... bitmap */ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; pwait->_key = filter | busy_flag; mask = vfs_poll(fd_file(f), pwait); if (mask & busy_flag) *can_busy_poll = true; return mask & filter; /* Mask out unneeded events. */ } static int do_poll(struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time) { poll_table* pt = &wait->pt; ktime_t expire, *to = NULL; int timed_out = 0, count = 0; u64 slack = 0; __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; unsigned long busy_start = 0; /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { pt->_qproc = NULL; timed_out = 1; } if (end_time && !timed_out) slack = select_estimate_accuracy(end_time); for (;;) { struct poll_list *walk; bool can_busy_loop = false; for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; pfd = walk->entries; pfd_end = pfd + walk->len; for (; pfd != pfd_end; pfd++) { __poll_t mask; /* * Fish for events. If we found one, record it * and kill poll_table->_qproc, so we don't * needlessly register any other waiters after * this. They'll get immediately deregistered * when we break out and return. */ mask = do_pollfd(pfd, pt, &can_busy_loop, busy_flag); pfd->revents = mangle_poll(mask); if (mask) { count++; pt->_qproc = NULL; /* found something, stop busy polling */ busy_flag = 0; can_busy_loop = false; } } } /* * All waiters have already been registered, so don't provide * a poll_table->_qproc to them on the next loop iteration. */ pt->_qproc = NULL; if (!count) { count = wait->error; if (signal_pending(current)) count = -ERESTARTNOHAND; } if (count || timed_out) break; /* only if found POLL_BUSY_LOOP sockets && not out of time */ if (can_busy_loop && !need_resched()) { if (!busy_start) { busy_start = busy_loop_current_time(); continue; } if (!busy_loop_timeout(busy_start)) continue; } busy_flag = 0; /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { expire = timespec64_to_ktime(*end_time); to = &expire; } if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } return count; } #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time) { struct poll_wqueues table; int err = -EFAULT, fdcount; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; struct poll_list *const head = (struct poll_list *)stack_pps; struct poll_list *walk = head; unsigned int todo = nfds; unsigned int len; if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; len = min_t(unsigned int, nfds, N_STACK_PPS); for (;;) { walk->next = NULL; walk->len = len; if (!len) break; if (copy_from_user(walk->entries, ufds + nfds-todo, sizeof(struct pollfd) * walk->len)) goto out_fds; if (walk->len >= todo) break; todo -= walk->len; len = min(todo, POLLFD_PER_PAGE); walk = walk->next = kmalloc(struct_size(walk, entries, len), GFP_KERNEL); if (!walk) { err = -ENOMEM; goto out_fds; } } poll_initwait(&table); fdcount = do_poll(head, &table, end_time); poll_freewait(&table); if (!user_write_access_begin(ufds, nfds * sizeof(*ufds))) goto out_fds; for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; unsigned int j; for (j = walk->len; j; fds++, ufds++, j--) unsafe_put_user(fds->revents, &ufds->revents, Efault); } user_write_access_end(); err = fdcount; out_fds: walk = head->next; while (walk) { struct poll_list *pos = walk; walk = walk->next; kfree(pos); } return err; Efault: user_write_access_end(); err = -EFAULT; goto out_fds; } static long do_restart_poll(struct restart_block *restart_block) { struct pollfd __user *ufds = restart_block->poll.ufds; int nfds = restart_block->poll.nfds; struct timespec64 *to = NULL, end_time; int ret; if (restart_block->poll.has_timeout) { end_time.tv_sec = restart_block->poll.tv_sec; end_time.tv_nsec = restart_block->poll.tv_nsec; to = &end_time; } ret = do_sys_poll(ufds, nfds, to); if (ret == -ERESTARTNOHAND) ret = set_restart_fn(restart_block, do_restart_poll); return ret; } SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout_msecs) { struct timespec64 end_time, *to = NULL; int ret; if (timeout_msecs >= 0) { to = &end_time; poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); } ret = do_sys_poll(ufds, nfds, to); if (ret == -ERESTARTNOHAND) { struct restart_block *restart_block; restart_block = &current->restart_block; restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; if (timeout_msecs >= 0) { restart_block->poll.tv_sec = end_time.tv_sec; restart_block->poll.tv_nsec = end_time.tv_nsec; restart_block->poll.has_timeout = 1; } else restart_block->poll.has_timeout = 0; ret = set_restart_fn(restart_block, do_restart_poll); } return ret; } SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { if (get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); } #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { if (get_old_timespec32(&ts, tsp)) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); } #endif #ifdef CONFIG_COMPAT #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. */ static int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { if (ufdset) { return compat_get_bitmap(fdset, ufdset, nr); } else { zero_fd_set(nr, fdset); return 0; } } static int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { if (!ufdset) return 0; return compat_put_bitmap(ufdset, fdset, nr); } /* * This is a virtual copy of sys_select from fs/select.c and probably * should be compared to it from time to time */ /* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ static int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct timespec64 *end_time) { fd_set_bits fds; void *bits; int size, max_fds, ret = -EINVAL; struct fdtable *fdt; long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; if (n < 0) goto out_nofds; /* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); max_fds = fdt->max_fds; rcu_read_unlock(); if (n > max_fds) n = max_fds; /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words. */ size = FDS_BYTES(n); bits = stack_fds; if (size > sizeof(stack_fds) / 6) { bits = kmalloc_array(6, size, GFP_KERNEL); ret = -ENOMEM; if (!bits) goto out_nofds; } fds.in = (unsigned long *) bits; fds.out = (unsigned long *) (bits + size); fds.ex = (unsigned long *) (bits + 2*size); fds.res_in = (unsigned long *) (bits + 3*size); fds.res_out = (unsigned long *) (bits + 4*size); fds.res_ex = (unsigned long *) (bits + 5*size); if ((ret = compat_get_fd_set(n, inp, fds.in)) || (ret = compat_get_fd_set(n, outp, fds.out)) || (ret = compat_get_fd_set(n, exp, fds.ex))) goto out; zero_fd_set(n, fds.res_in); zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); ret = do_select(n, &fds, end_time); if (ret < 0) goto out; if (!ret) { ret = -ERESTARTNOHAND; if (signal_pending(current)) goto out; ret = 0; } if (compat_set_fd_set(n, inp, fds.res_in) || compat_set_fd_set(n, outp, fds.res_out) || compat_set_fd_set(n, exp, fds.res_ex)) ret = -EFAULT; out: if (bits != stack_fds) kfree(bits); out_nofds: return ret; } static int do_compat_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timeval32 __user *tvp) { struct timespec64 end_time, *to = NULL; struct old_timeval32 tv; int ret; if (tvp) { if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; } ret = compat_core_sys_select(n, inp, outp, exp, to); return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret); } COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct old_timeval32 __user *, tvp) { return do_compat_select(n, inp, outp, exp, tvp); } struct compat_sel_arg_struct { compat_ulong_t n; compat_uptr_t inp; compat_uptr_t outp; compat_uptr_t exp; compat_uptr_t tvp; }; COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) { struct compat_sel_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), compat_ptr(a.exp), compat_ptr(a.tvp)); } static long do_compat_pselect(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, void __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize, enum poll_time_type type) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { switch (type) { case PT_OLD_TIMESPEC: if (get_old_timespec32(&ts, tsp)) return -EFAULT; break; case PT_TIMESPEC: if (get_timespec64(&ts, tsp)) return -EFAULT; break; default: BUG(); } to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); return poll_select_finish(&end_time, tsp, type, ret); } struct compat_sigset_argpack { compat_uptr_t p; compat_size_t size; }; static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to, struct compat_sigset_argpack __user *from) { if (from) { if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(to->p, &from->p, Efault); unsafe_get_user(to->size, &from->size, Efault); user_read_access_end(); } return 0; Efault: user_access_end(); return -EFAULT; } COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct __kernel_timespec __user *, tsp, void __user *, sig) { struct compat_sigset_argpack x = {0, 0}; if (get_compat_sigset_argpack(&x, sig)) return -EFAULT; return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p), x.size, PT_TIMESPEC); } #if defined(CONFIG_COMPAT_32BIT_TIME) COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) { struct compat_sigset_argpack x = {0, 0}; if (get_compat_sigset_argpack(&x, sig)) return -EFAULT; return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p), x.size, PT_OLD_TIMESPEC); } #endif #if defined(CONFIG_COMPAT_32BIT_TIME) COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { if (get_old_timespec32(&ts, tsp)) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); } #endif /* New compat syscall for 64 bit time_t*/ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { if (get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); } #endif
15 19 25 15 30 19 19 30 25 22 24 25 25 25 25 25 19 1 26 26 26 31 30 4 13 31 3 28 27 21 1 18 27 4 7 1 4 12 12 25 5 30 13 5 26 1 26 25 26 17 26 26 15 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 // SPDX-License-Identifier: GPL-2.0 /* * NETLINK Policy advertisement to userspace * * Authors: Johannes Berg <johannes@sipsolutions.net> * * Copyright 2019 Intel Corporation */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/types.h> #include <net/netlink.h> #define INITIAL_POLICIES_ALLOC 10 struct netlink_policy_dump_state { unsigned int policy_idx; unsigned int attr_idx; unsigned int n_alloc; struct { const struct nla_policy *policy; unsigned int maxtype; } policies[] __counted_by(n_alloc); }; static int add_policy(struct netlink_policy_dump_state **statep, const struct nla_policy *policy, unsigned int maxtype) { struct netlink_policy_dump_state *state = *statep; unsigned int old_n_alloc, n_alloc, i; if (!policy || !maxtype) return 0; for (i = 0; i < state->n_alloc; i++) { if (state->policies[i].policy == policy && state->policies[i].maxtype == maxtype) return 0; if (!state->policies[i].policy) { state->policies[i].policy = policy; state->policies[i].maxtype = maxtype; return 0; } } n_alloc = state->n_alloc + INITIAL_POLICIES_ALLOC; state = krealloc(state, struct_size(state, policies, n_alloc), GFP_KERNEL); if (!state) return -ENOMEM; old_n_alloc = state->n_alloc; state->n_alloc = n_alloc; memset(&state->policies[old_n_alloc], 0, flex_array_size(state, policies, n_alloc - old_n_alloc)); state->policies[old_n_alloc].policy = policy; state->policies[old_n_alloc].maxtype = maxtype; *statep = state; return 0; } /** * netlink_policy_dump_get_policy_idx - retrieve policy index * @state: the policy dump state * @policy: the policy to find * @maxtype: the policy's maxattr * * Returns: the index of the given policy in the dump state * * Call this to find a policy index when you've added multiple and e.g. * need to tell userspace which command has which policy (by index). * * Note: this will WARN and return 0 if the policy isn't found, which * means it wasn't added in the first place, which would be an * internal consistency bug. */ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state, const struct nla_policy *policy, unsigned int maxtype) { unsigned int i; if (WARN_ON(!policy || !maxtype)) return 0; for (i = 0; i < state->n_alloc; i++) { if (state->policies[i].policy == policy && state->policies[i].maxtype == maxtype) return i; } WARN_ON(1); return 0; } static struct netlink_policy_dump_state *alloc_state(void) { struct netlink_policy_dump_state *state; state = kzalloc(struct_size(state, policies, INITIAL_POLICIES_ALLOC), GFP_KERNEL); if (!state) return ERR_PTR(-ENOMEM); state->n_alloc = INITIAL_POLICIES_ALLOC; return state; } /** * netlink_policy_dump_add_policy - add a policy to the dump * @pstate: state to add to, may be reallocated, must be %NULL the first time * @policy: the new policy to add to the dump * @maxtype: the new policy's max attr type * * Returns: 0 on success, a negative error code otherwise. * * Call this to allocate a policy dump state, and to add policies to it. This * should be called from the dump start() callback. * * Note: on failures, any previously allocated state is freed. */ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate, const struct nla_policy *policy, unsigned int maxtype) { struct netlink_policy_dump_state *state = *pstate; unsigned int policy_idx; int err; if (!state) { state = alloc_state(); if (IS_ERR(state)) return PTR_ERR(state); } /* * walk the policies and nested ones first, and build * a linear list of them. */ err = add_policy(&state, policy, maxtype); if (err) goto err_try_undo; for (policy_idx = 0; policy_idx < state->n_alloc && state->policies[policy_idx].policy; policy_idx++) { const struct nla_policy *policy; unsigned int type; policy = state->policies[policy_idx].policy; for (type = 0; type <= state->policies[policy_idx].maxtype; type++) { switch (policy[type].type) { case NLA_NESTED: case NLA_NESTED_ARRAY: err = add_policy(&state, policy[type].nested_policy, policy[type].len); if (err) goto err_try_undo; break; default: break; } } } *pstate = state; return 0; err_try_undo: /* Try to preserve reasonable unwind semantics - if we're starting from * scratch clean up fully, otherwise record what we got and caller will. */ if (!*pstate) netlink_policy_dump_free(state); else *pstate = state; return err; } static bool netlink_policy_dump_finished(struct netlink_policy_dump_state *state) { return state->policy_idx >= state->n_alloc || !state->policies[state->policy_idx].policy; } /** * netlink_policy_dump_loop - dumping loop indicator * @state: the policy dump state * * Returns: %true if the dump continues, %false otherwise * * Note: this frees the dump state when finishing */ bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state) { return !netlink_policy_dump_finished(state); } int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt) { /* nested + type */ int common = 2 * nla_attr_size(sizeof(u32)); switch (pt->type) { case NLA_UNSPEC: case NLA_REJECT: /* these actually don't need any space */ return 0; case NLA_NESTED: case NLA_NESTED_ARRAY: /* common, policy idx, policy maxattr */ return common + 2 * nla_attr_size(sizeof(u32)); case NLA_U8: case NLA_U16: case NLA_U32: case NLA_U64: case NLA_MSECS: case NLA_S8: case NLA_S16: case NLA_S32: case NLA_S64: case NLA_SINT: case NLA_UINT: /* maximum is common, u64 min/max with padding */ return common + 2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64))); case NLA_BITFIELD32: return common + nla_attr_size(sizeof(u32)); case NLA_STRING: case NLA_NUL_STRING: case NLA_BINARY: /* maximum is common, u32 min-length/max-length */ return common + 2 * nla_attr_size(sizeof(u32)); case NLA_FLAG: return common; } /* this should then cause a warning later */ return 0; } static int __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, struct sk_buff *skb, const struct nla_policy *pt, int nestattr) { int estimate = netlink_policy_dump_attr_size_estimate(pt); enum netlink_attribute_type type; struct nlattr *attr; attr = nla_nest_start(skb, nestattr); if (!attr) return -ENOBUFS; switch (pt->type) { default: case NLA_UNSPEC: case NLA_REJECT: /* skip - use NLA_MIN_LEN to advertise such */ nla_nest_cancel(skb, attr); return -ENODATA; case NLA_NESTED: type = NL_ATTR_TYPE_NESTED; fallthrough; case NLA_NESTED_ARRAY: if (pt->type == NLA_NESTED_ARRAY) type = NL_ATTR_TYPE_NESTED_ARRAY; if (state && pt->nested_policy && pt->len && (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_IDX, netlink_policy_dump_get_policy_idx(state, pt->nested_policy, pt->len)) || nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, pt->len))) goto nla_put_failure; break; case NLA_U8: case NLA_U16: case NLA_U32: case NLA_U64: case NLA_UINT: case NLA_MSECS: { struct netlink_range_validation range; if (pt->type == NLA_U8) type = NL_ATTR_TYPE_U8; else if (pt->type == NLA_U16) type = NL_ATTR_TYPE_U16; else if (pt->type == NLA_U32) type = NL_ATTR_TYPE_U32; else if (pt->type == NLA_U64) type = NL_ATTR_TYPE_U64; else type = NL_ATTR_TYPE_UINT; if (pt->validation_type == NLA_VALIDATE_MASK) { if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK, pt->mask, NL_POLICY_TYPE_ATTR_PAD)) goto nla_put_failure; break; } nla_get_range_unsigned(pt, &range); if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U, range.min, NL_POLICY_TYPE_ATTR_PAD) || nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_U, range.max, NL_POLICY_TYPE_ATTR_PAD)) goto nla_put_failure; break; } case NLA_S8: case NLA_S16: case NLA_S32: case NLA_S64: case NLA_SINT: { struct netlink_range_validation_signed range; if (pt->type == NLA_S8) type = NL_ATTR_TYPE_S8; else if (pt->type == NLA_S16) type = NL_ATTR_TYPE_S16; else if (pt->type == NLA_S32) type = NL_ATTR_TYPE_S32; else if (pt->type == NLA_S64) type = NL_ATTR_TYPE_S64; else type = NL_ATTR_TYPE_SINT; nla_get_range_signed(pt, &range); if (nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_S, range.min, NL_POLICY_TYPE_ATTR_PAD) || nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_S, range.max, NL_POLICY_TYPE_ATTR_PAD)) goto nla_put_failure; break; } case NLA_BITFIELD32: type = NL_ATTR_TYPE_BITFIELD32; if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, pt->bitfield32_valid)) goto nla_put_failure; break; case NLA_STRING: case NLA_NUL_STRING: case NLA_BINARY: if (pt->type == NLA_STRING) type = NL_ATTR_TYPE_STRING; else if (pt->type == NLA_NUL_STRING) type = NL_ATTR_TYPE_NUL_STRING; else type = NL_ATTR_TYPE_BINARY; if (pt->validation_type == NLA_VALIDATE_RANGE || pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG) { struct netlink_range_validation range; nla_get_range_unsigned(pt, &range); if (range.min && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, range.min)) goto nla_put_failure; if (range.max < U16_MAX && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, range.max)) goto nla_put_failure; } else if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len)) { goto nla_put_failure; } break; case NLA_FLAG: type = NL_ATTR_TYPE_FLAG; break; } if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_TYPE, type)) goto nla_put_failure; nla_nest_end(skb, attr); WARN_ON(attr->nla_len > estimate); return 0; nla_put_failure: nla_nest_cancel(skb, attr); return -ENOBUFS; } /** * netlink_policy_dump_write_attr - write a given attribute policy * @skb: the message skb to write to * @pt: the attribute's policy * @nestattr: the nested attribute ID to use * * Returns: 0 on success, an error code otherwise; -%ENODATA is * special, indicating that there's no policy data and * the attribute is generally rejected. */ int netlink_policy_dump_write_attr(struct sk_buff *skb, const struct nla_policy *pt, int nestattr) { return __netlink_policy_dump_write_attr(NULL, skb, pt, nestattr); } /** * netlink_policy_dump_write - write current policy dump attributes * @skb: the message skb to write to * @state: the policy dump state * * Returns: 0 on success, an error code otherwise */ int netlink_policy_dump_write(struct sk_buff *skb, struct netlink_policy_dump_state *state) { const struct nla_policy *pt; struct nlattr *policy; bool again; int err; send_attribute: again = false; pt = &state->policies[state->policy_idx].policy[state->attr_idx]; policy = nla_nest_start(skb, state->policy_idx); if (!policy) return -ENOBUFS; err = __netlink_policy_dump_write_attr(state, skb, pt, state->attr_idx); if (err == -ENODATA) { nla_nest_cancel(skb, policy); again = true; goto next; } else if (err) { goto nla_put_failure; } /* finish and move state to next attribute */ nla_nest_end(skb, policy); next: state->attr_idx += 1; if (state->attr_idx > state->policies[state->policy_idx].maxtype) { state->attr_idx = 0; state->policy_idx++; } if (again) { if (netlink_policy_dump_finished(state)) return -ENODATA; goto send_attribute; } return 0; nla_put_failure: nla_nest_cancel(skb, policy); return -ENOBUFS; } /** * netlink_policy_dump_free - free policy dump state * @state: the policy dump state to free * * Call this from the done() method to ensure dump state is freed. */ void netlink_policy_dump_free(struct netlink_policy_dump_state *state) { kfree(state); }
77 10 26 3 9 25 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv4/nf_reject.h> #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_inet_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: nf_send_reset(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), nft_hook(pkt)); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach6(nft_net(pkt), pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), nft_hook(pkt)); break; } break; } regs->verdict.code = NF_DROP; } static int nft_reject_inet_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_INGRESS)); } static struct nft_expr_type nft_reject_inet_type; static const struct nft_expr_ops nft_reject_inet_ops = { .type = &nft_reject_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_inet_eval, .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_inet_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { .family = NFPROTO_INET, .name = "reject", .ops = &nft_reject_inet_ops, .policy = nft_reject_policy, .maxattr = NFTA_REJECT_MAX, .owner = THIS_MODULE, }; static int __init nft_reject_inet_module_init(void) { return nft_register_expr(&nft_reject_inet_type); } static void __exit nft_reject_inet_module_exit(void) { nft_unregister_expr(&nft_reject_inet_type); } module_init(nft_reject_inet_module_init); module_exit(nft_reject_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); MODULE_DESCRIPTION("Netfilter nftables reject inet support");
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM spi #if !defined(_TRACE_SPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SPI_H #include <linux/ktime.h> #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(spi_controller, TP_PROTO(struct spi_controller *controller), TP_ARGS(controller), TP_STRUCT__entry( __field( int, bus_num ) ), TP_fast_assign( __entry->bus_num = controller->bus_num; ), TP_printk("spi%d", (int)__entry->bus_num) ); DEFINE_EVENT(spi_controller, spi_controller_idle, TP_PROTO(struct spi_controller *controller), TP_ARGS(controller) ); DEFINE_EVENT(spi_controller, spi_controller_busy, TP_PROTO(struct spi_controller *controller), TP_ARGS(controller) ); TRACE_EVENT(spi_setup, TP_PROTO(struct spi_device *spi, int status), TP_ARGS(spi, status), TP_STRUCT__entry( __field(int, bus_num) __field(int, chip_select) __field(unsigned long, mode) __field(unsigned int, bits_per_word) __field(unsigned int, max_speed_hz) __field(int, status) ), TP_fast_assign( __entry->bus_num = spi->controller->bus_num; __entry->chip_select = spi_get_chipselect(spi, 0); __entry->mode = spi->mode; __entry->bits_per_word = spi->bits_per_word; __entry->max_speed_hz = spi->max_speed_hz; __entry->status = status; ), TP_printk("spi%d.%d setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d", __entry->bus_num, __entry->chip_select, (__entry->mode & SPI_MODE_X_MASK), (__entry->mode & SPI_CS_HIGH) ? "cs_high, " : "", (__entry->mode & SPI_LSB_FIRST) ? "lsb, " : "", (__entry->mode & SPI_3WIRE) ? "3wire, " : "", (__entry->mode & SPI_LOOP) ? "loopback, " : "", __entry->bits_per_word, __entry->max_speed_hz, __entry->status) ); TRACE_EVENT(spi_set_cs, TP_PROTO(struct spi_device *spi, bool enable), TP_ARGS(spi, enable), TP_STRUCT__entry( __field(int, bus_num) __field(int, chip_select) __field(unsigned long, mode) __field(bool, enable) ), TP_fast_assign( __entry->bus_num = spi->controller->bus_num; __entry->chip_select = spi_get_chipselect(spi, 0); __entry->mode = spi->mode; __entry->enable = enable; ), TP_printk("spi%d.%d %s%s", __entry->bus_num, __entry->chip_select, __entry->enable ? "activate" : "deactivate", (__entry->mode & SPI_CS_HIGH) ? ", cs_high" : "") ); DECLARE_EVENT_CLASS(spi_message, TP_PROTO(struct spi_message *msg), TP_ARGS(msg), TP_STRUCT__entry( __field( int, bus_num ) __field( int, chip_select ) __field( struct spi_message *, msg ) ), TP_fast_assign( __entry->bus_num = msg->spi->controller->bus_num; __entry->chip_select = spi_get_chipselect(msg->spi, 0); __entry->msg = msg; ), TP_printk("spi%d.%d %p", (int)__entry->bus_num, (int)__entry->chip_select, (struct spi_message *)__entry->msg) ); DEFINE_EVENT(spi_message, spi_message_submit, TP_PROTO(struct spi_message *msg), TP_ARGS(msg) ); DEFINE_EVENT(spi_message, spi_message_start, TP_PROTO(struct spi_message *msg), TP_ARGS(msg) ); TRACE_EVENT(spi_message_done, TP_PROTO(struct spi_message *msg), TP_ARGS(msg), TP_STRUCT__entry( __field( int, bus_num ) __field( int, chip_select ) __field( struct spi_message *, msg ) __field( unsigned, frame ) __field( unsigned, actual ) ), TP_fast_assign( __entry->bus_num = msg->spi->controller->bus_num; __entry->chip_select = spi_get_chipselect(msg->spi, 0); __entry->msg = msg; __entry->frame = msg->frame_length; __entry->actual = msg->actual_length; ), TP_printk("spi%d.%d %p len=%u/%u", (int)__entry->bus_num, (int)__entry->chip_select, (struct spi_message *)__entry->msg, (unsigned)__entry->actual, (unsigned)__entry->frame) ); /* * Consider a buffer valid if non-NULL and if it doesn't match the dummy buffer * that only exist to work with controllers that have SPI_CONTROLLER_MUST_TX or * SPI_CONTROLLER_MUST_RX. */ #define spi_valid_txbuf(msg, xfer) \ (xfer->tx_buf && xfer->tx_buf != msg->spi->controller->dummy_tx) #define spi_valid_rxbuf(msg, xfer) \ (xfer->rx_buf && xfer->rx_buf != msg->spi->controller->dummy_rx) DECLARE_EVENT_CLASS(spi_transfer, TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer), TP_ARGS(msg, xfer), TP_STRUCT__entry( __field( int, bus_num ) __field( int, chip_select ) __field( struct spi_transfer *, xfer ) __field( int, len ) __dynamic_array(u8, rx_buf, spi_valid_rxbuf(msg, xfer) ? (xfer->len < 64 ? xfer->len : 64) : 0) __dynamic_array(u8, tx_buf, spi_valid_txbuf(msg, xfer) ? (xfer->len < 64 ? xfer->len : 64) : 0) ), TP_fast_assign( __entry->bus_num = msg->spi->controller->bus_num; __entry->chip_select = spi_get_chipselect(msg->spi, 0); __entry->xfer = xfer; __entry->len = xfer->len; if (spi_valid_txbuf(msg, xfer)) memcpy(__get_dynamic_array(tx_buf), xfer->tx_buf, __get_dynamic_array_len(tx_buf)); if (spi_valid_rxbuf(msg, xfer)) memcpy(__get_dynamic_array(rx_buf), xfer->rx_buf, __get_dynamic_array_len(rx_buf)); ), TP_printk("spi%d.%d %p len=%d tx=[%*phD] rx=[%*phD]", __entry->bus_num, __entry->chip_select, __entry->xfer, __entry->len, __get_dynamic_array_len(tx_buf), __get_dynamic_array(tx_buf), __get_dynamic_array_len(rx_buf), __get_dynamic_array(rx_buf)) ); DEFINE_EVENT(spi_transfer, spi_transfer_start, TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer), TP_ARGS(msg, xfer) ); DEFINE_EVENT(spi_transfer, spi_transfer_stop, TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer), TP_ARGS(msg, xfer) ); #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
27 28 57 85 9 18 28 57 86 1 85 55 56 15 45 30 30 30 30 27 27 9 9 9 9 5 12 120 120 58 78 78 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual Raw MIDI client on Sequencer * * Copyright (c) 2000 by Takashi Iwai <tiwai@suse.de>, * Jaroslav Kysela <perex@perex.cz> */ /* * Virtual Raw MIDI client * * The virtual rawmidi client is a sequencer client which associate * a rawmidi device file. The created rawmidi device file can be * accessed as a normal raw midi, but its MIDI source and destination * are arbitrary. For example, a user-client software synth connected * to this port can be used as a normal midi device as well. * * The virtual rawmidi device accepts also multiple opens. Each file * has its own input buffer, so that no conflict would occur. The drain * of input/output buffer acts only to the local buffer. * */ #include <linux/init.h> #include <linux/wait.h> #include <linux/module.h> #include <linux/slab.h> #include <sound/core.h> #include <sound/rawmidi.h> #include <sound/info.h> #include <sound/control.h> #include <sound/minors.h> #include <sound/seq_kernel.h> #include <sound/seq_midi_event.h> #include <sound/seq_virmidi.h> MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("Virtual Raw MIDI client on Sequencer"); MODULE_LICENSE("GPL"); /* * initialize an event record */ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, struct snd_seq_event *ev) { memset(ev, 0, sizeof(*ev)); ev->source.port = vmidi->port; switch (vmidi->seq_mode) { case SNDRV_VIRMIDI_SEQ_DISPATCH: ev->dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; break; case SNDRV_VIRMIDI_SEQ_ATTACH: /* FIXME: source and destination are same - not good.. */ ev->dest.client = vmidi->client; ev->dest.port = vmidi->port; break; } ev->type = SNDRV_SEQ_EVENT_NONE; } /* * decode input event and put to read buffer of each opened file */ /* callback for snd_seq_dump_var_event(), bridging to snd_rawmidi_receive() */ static int dump_to_rawmidi(void *ptr, void *buf, int count) { return snd_rawmidi_receive(ptr, buf, count); } static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, struct snd_seq_event *ev, bool atomic) { struct snd_virmidi *vmidi; unsigned char msg[4]; int len; if (atomic) read_lock(&rdev->filelist_lock); else down_read(&rdev->filelist_sem); list_for_each_entry(vmidi, &rdev->filelist, list) { if (!READ_ONCE(vmidi->trigger)) continue; if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; snd_seq_dump_var_event(ev, dump_to_rawmidi, vmidi->substream); snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); if (len > 0) snd_rawmidi_receive(vmidi->substream, msg, len); } } if (atomic) read_unlock(&rdev->filelist_lock); else up_read(&rdev->filelist_sem); return 0; } /* * event handler of virmidi port */ static int snd_virmidi_event_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct snd_virmidi_dev *rdev; rdev = private_data; if (!(rdev->flags & SNDRV_VIRMIDI_USE)) return 0; /* ignored */ return snd_virmidi_dev_receive_event(rdev, ev, atomic); } /* * trigger rawmidi stream for input */ static void snd_virmidi_input_trigger(struct snd_rawmidi_substream *substream, int up) { struct snd_virmidi *vmidi = substream->runtime->private_data; WRITE_ONCE(vmidi->trigger, !!up); } /* process rawmidi bytes and send events; * we need no lock here for vmidi->event since it's handled only in this work */ static void snd_vmidi_output_work(struct work_struct *work) { struct snd_virmidi *vmidi; struct snd_rawmidi_substream *substream; unsigned char input; int ret; vmidi = container_of(work, struct snd_virmidi, output_work); substream = vmidi->substream; /* discard the outputs in dispatch mode unless subscribed */ if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH && !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) { snd_rawmidi_proceed(substream); return; } while (READ_ONCE(vmidi->trigger)) { if (snd_rawmidi_transmit(substream, &input, 1) != 1) break; if (!snd_midi_event_encode_byte(vmidi->parser, input, &vmidi->event)) continue; if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { ret = snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, false, 0); vmidi->event.type = SNDRV_SEQ_EVENT_NONE; if (ret < 0) break; } /* rawmidi input might be huge, allow to have a break */ cond_resched(); } } /* * trigger rawmidi stream for output */ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, int up) { struct snd_virmidi *vmidi = substream->runtime->private_data; WRITE_ONCE(vmidi->trigger, !!up); if (up) queue_work(system_highpri_wq, &vmidi->output_work); } /* * open rawmidi handle for input */ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) { struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_virmidi *vmidi; vmidi = kzalloc(sizeof(*vmidi), GFP_KERNEL); if (vmidi == NULL) return -ENOMEM; vmidi->substream = substream; if (snd_midi_event_new(0, &vmidi->parser) < 0) { kfree(vmidi); return -ENOMEM; } vmidi->seq_mode = rdev->seq_mode; vmidi->client = rdev->client; vmidi->port = rdev->port; runtime->private_data = vmidi; scoped_guard(rwsem_write, &rdev->filelist_sem) { guard(write_lock_irq)(&rdev->filelist_lock); list_add_tail(&vmidi->list, &rdev->filelist); } vmidi->rdev = rdev; return 0; } /* * open rawmidi handle for output */ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream) { struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_virmidi *vmidi; vmidi = kzalloc(sizeof(*vmidi), GFP_KERNEL); if (vmidi == NULL) return -ENOMEM; vmidi->substream = substream; if (snd_midi_event_new(MAX_MIDI_EVENT_BUF, &vmidi->parser) < 0) { kfree(vmidi); return -ENOMEM; } vmidi->seq_mode = rdev->seq_mode; vmidi->client = rdev->client; vmidi->port = rdev->port; snd_virmidi_init_event(vmidi, &vmidi->event); vmidi->rdev = rdev; INIT_WORK(&vmidi->output_work, snd_vmidi_output_work); runtime->private_data = vmidi; return 0; } /* * close rawmidi handle for input */ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) { struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; scoped_guard(rwsem_write, &rdev->filelist_sem) { guard(write_lock_irq)(&rdev->filelist_lock); list_del(&vmidi->list); } snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); return 0; } /* * close rawmidi handle for output */ static int snd_virmidi_output_close(struct snd_rawmidi_substream *substream) { struct snd_virmidi *vmidi = substream->runtime->private_data; WRITE_ONCE(vmidi->trigger, false); /* to be sure */ cancel_work_sync(&vmidi->output_work); snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); return 0; } /* * drain output work queue */ static void snd_virmidi_output_drain(struct snd_rawmidi_substream *substream) { struct snd_virmidi *vmidi = substream->runtime->private_data; flush_work(&vmidi->output_work); } /* * subscribe callback - allow output to rawmidi device */ static int snd_virmidi_subscribe(void *private_data, struct snd_seq_port_subscribe *info) { struct snd_virmidi_dev *rdev; rdev = private_data; if (!try_module_get(rdev->card->module)) return -EFAULT; rdev->flags |= SNDRV_VIRMIDI_SUBSCRIBE; return 0; } /* * unsubscribe callback - disallow output to rawmidi device */ static int snd_virmidi_unsubscribe(void *private_data, struct snd_seq_port_subscribe *info) { struct snd_virmidi_dev *rdev; rdev = private_data; rdev->flags &= ~SNDRV_VIRMIDI_SUBSCRIBE; module_put(rdev->card->module); return 0; } /* * use callback - allow input to rawmidi device */ static int snd_virmidi_use(void *private_data, struct snd_seq_port_subscribe *info) { struct snd_virmidi_dev *rdev; rdev = private_data; if (!try_module_get(rdev->card->module)) return -EFAULT; rdev->flags |= SNDRV_VIRMIDI_USE; return 0; } /* * unuse callback - disallow input to rawmidi device */ static int snd_virmidi_unuse(void *private_data, struct snd_seq_port_subscribe *info) { struct snd_virmidi_dev *rdev; rdev = private_data; rdev->flags &= ~SNDRV_VIRMIDI_USE; module_put(rdev->card->module); return 0; } /* * Register functions */ static const struct snd_rawmidi_ops snd_virmidi_input_ops = { .open = snd_virmidi_input_open, .close = snd_virmidi_input_close, .trigger = snd_virmidi_input_trigger, }; static const struct snd_rawmidi_ops snd_virmidi_output_ops = { .open = snd_virmidi_output_open, .close = snd_virmidi_output_close, .trigger = snd_virmidi_output_trigger, .drain = snd_virmidi_output_drain, }; /* * create a sequencer client and a port */ static int snd_virmidi_dev_attach_seq(struct snd_virmidi_dev *rdev) { int client; struct snd_seq_port_callback pcallbacks; struct snd_seq_port_info *pinfo __free(kfree) = NULL; int err; if (rdev->client >= 0) return 0; pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL); if (!pinfo) return -ENOMEM; client = snd_seq_create_kernel_client(rdev->card, rdev->device, "%s %d-%d", rdev->rmidi->name, rdev->card->number, rdev->device); if (client < 0) return client; rdev->client = client; /* create a port */ pinfo->addr.client = client; sprintf(pinfo->name, "VirMIDI %d-%d", rdev->card->number, rdev->device); /* set all capabilities */ pinfo->capability |= SNDRV_SEQ_PORT_CAP_WRITE | SNDRV_SEQ_PORT_CAP_SYNC_WRITE | SNDRV_SEQ_PORT_CAP_SUBS_WRITE; pinfo->capability |= SNDRV_SEQ_PORT_CAP_READ | SNDRV_SEQ_PORT_CAP_SYNC_READ | SNDRV_SEQ_PORT_CAP_SUBS_READ; pinfo->capability |= SNDRV_SEQ_PORT_CAP_DUPLEX; pinfo->direction = SNDRV_SEQ_PORT_DIR_BIDIRECTION; pinfo->type = SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_SOFTWARE | SNDRV_SEQ_PORT_TYPE_PORT; pinfo->midi_channels = 16; memset(&pcallbacks, 0, sizeof(pcallbacks)); pcallbacks.owner = THIS_MODULE; pcallbacks.private_data = rdev; pcallbacks.subscribe = snd_virmidi_subscribe; pcallbacks.unsubscribe = snd_virmidi_unsubscribe; pcallbacks.use = snd_virmidi_use; pcallbacks.unuse = snd_virmidi_unuse; pcallbacks.event_input = snd_virmidi_event_input; pinfo->kernel = &pcallbacks; err = snd_seq_kernel_client_ctl(client, SNDRV_SEQ_IOCTL_CREATE_PORT, pinfo); if (err < 0) { snd_seq_delete_kernel_client(client); rdev->client = -1; return err; } rdev->port = pinfo->addr.port; return 0; /* success */ } /* * release the sequencer client */ static void snd_virmidi_dev_detach_seq(struct snd_virmidi_dev *rdev) { if (rdev->client >= 0) { snd_seq_delete_kernel_client(rdev->client); rdev->client = -1; } } /* * register the device */ static int snd_virmidi_dev_register(struct snd_rawmidi *rmidi) { struct snd_virmidi_dev *rdev = rmidi->private_data; int err; switch (rdev->seq_mode) { case SNDRV_VIRMIDI_SEQ_DISPATCH: err = snd_virmidi_dev_attach_seq(rdev); if (err < 0) return err; break; case SNDRV_VIRMIDI_SEQ_ATTACH: if (rdev->client == 0) return -EINVAL; /* should check presence of port more strictly.. */ break; default: pr_err("ALSA: seq_virmidi: seq_mode is not set: %d\n", rdev->seq_mode); return -EINVAL; } return 0; } /* * unregister the device */ static int snd_virmidi_dev_unregister(struct snd_rawmidi *rmidi) { struct snd_virmidi_dev *rdev = rmidi->private_data; if (rdev->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH) snd_virmidi_dev_detach_seq(rdev); return 0; } /* * */ static const struct snd_rawmidi_global_ops snd_virmidi_global_ops = { .dev_register = snd_virmidi_dev_register, .dev_unregister = snd_virmidi_dev_unregister, }; /* * free device */ static void snd_virmidi_free(struct snd_rawmidi *rmidi) { struct snd_virmidi_dev *rdev = rmidi->private_data; kfree(rdev); } /* * create a new device * */ /* exported */ int snd_virmidi_new(struct snd_card *card, int device, struct snd_rawmidi **rrmidi) { struct snd_rawmidi *rmidi; struct snd_virmidi_dev *rdev; int err; *rrmidi = NULL; err = snd_rawmidi_new(card, "VirMidi", device, 16, /* may be configurable */ 16, /* may be configurable */ &rmidi); if (err < 0) return err; strcpy(rmidi->name, rmidi->id); rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); if (rdev == NULL) { snd_device_free(card, rmidi); return -ENOMEM; } rdev->card = card; rdev->rmidi = rmidi; rdev->device = device; rdev->client = -1; init_rwsem(&rdev->filelist_sem); rwlock_init(&rdev->filelist_lock); INIT_LIST_HEAD(&rdev->filelist); rdev->seq_mode = SNDRV_VIRMIDI_SEQ_DISPATCH; rmidi->private_data = rdev; rmidi->private_free = snd_virmidi_free; rmidi->ops = &snd_virmidi_global_ops; snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &snd_virmidi_input_ops); snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &snd_virmidi_output_ops); rmidi->info_flags = SNDRV_RAWMIDI_INFO_INPUT | SNDRV_RAWMIDI_INFO_OUTPUT | SNDRV_RAWMIDI_INFO_DUPLEX; *rrmidi = rmidi; return 0; } EXPORT_SYMBOL(snd_virmidi_new);
169 169 3651 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TRACE_EVENT_H #define _LINUX_TRACE_EVENT_H #include <linux/ring_buffer.h> #include <linux/trace_seq.h> #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/perf_event.h> #include <linux/tracepoint.h> struct trace_array; struct array_buffer; struct tracer; struct dentry; struct bpf_prog; union bpf_attr; /* Used for event string fields when they are NULL */ #define EVENT_NULL_STR "(null)" const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, unsigned long long flags, const struct trace_print_flags_u64 *flag_array); const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array); #endif const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, unsigned int bitmask_size); const char *trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len, bool concatenate); const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size); const char * trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); struct trace_iterator; struct trace_event; int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); extern __printf(2, 3) void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 len; u16 offset; #else u16 offset; u16 len; #endif } __packed; /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: * * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { unsigned short type; unsigned char flags; unsigned char preempt_count; int pid; }; #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: */ struct trace_iterator { struct trace_array *tr; struct tracer *trace; struct array_buffer *array_buffer; void *private; int cpu_file; struct mutex mutex; struct ring_buffer_iter **buffer_iter; unsigned long iter_flags; void *temp; /* temp holder */ unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; atomic_t wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; cpumask_var_t started; /* Set when the file is closed to prevent new waiters */ bool closed; /* it's true when current open file is snapshot */ bool snapshot; /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; unsigned long lost_events; int leftover; int ent_size; int cpu; u64 ts; loff_t pos; long idx; /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, TRACE_FILE_TIME_IN_NS = 4, }; typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags, struct trace_event *event); struct trace_event_functions { trace_print_func trace; trace_print_func raw; trace_print_func hex; trace_print_func binary; }; struct trace_event { struct hlist_node node; int type; struct trace_event_functions *funcs; }; extern int register_trace_event(struct trace_event *event); extern int unregister_trace_event(struct trace_event *event); /* Return values for print_line callback */ enum print_line_t { TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ TRACE_TYPE_HANDLED = 1, TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; enum print_line_t trace_handle_return(struct trace_seq *s); static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, unsigned int trace_ctx) { entry->preempt_count = trace_ctx & 0xff; entry->pid = current->pid; entry->type = type; entry->flags = trace_ctx >> 16; } unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED_LAZY = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, TRACE_FLAG_BH_OFF = 0x80, }; static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) { unsigned int irq_status = irqs_disabled_flags(irqflags) ? TRACE_FLAG_IRQS_OFF : 0; return tracing_gen_ctx_irq_test(irq_status); } static inline unsigned int tracing_gen_ctx(void) { unsigned long irqflags; local_save_flags(irqflags); return tracing_gen_ctx_flags(irqflags); } static inline unsigned int tracing_gen_ctx_dec(void) { unsigned int trace_ctx; trace_ctx = tracing_gen_ctx(); /* * Subtract one from the preemption counter if preemption is enabled, * see trace_event_buffer_reserve()for details. */ if (IS_ENABLED(CONFIG_PREEMPTION)) trace_ctx--; return trace_ctx; } struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, unsigned int trace_ctx); #define TRACE_RECORD_CMDLINE BIT(0) #define TRACE_RECORD_TGID BIT(1) void tracing_record_taskinfo(struct task_struct *task, int flags); void tracing_record_taskinfo_sched_switch(struct task_struct *prev, struct task_struct *next, int flags); void tracing_record_cmdline(struct task_struct *task); void tracing_record_tgid(struct task_struct *task); int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) __printf(3, 4); struct event_filter; enum trace_reg { TRACE_REG_REGISTER, TRACE_REG_UNREGISTER, #ifdef CONFIG_PERF_EVENTS TRACE_REG_PERF_REGISTER, TRACE_REG_PERF_UNREGISTER, TRACE_REG_PERF_OPEN, TRACE_REG_PERF_CLOSE, /* * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a * custom action was taken and the default action is not to be * performed. */ TRACE_REG_PERF_ADD, TRACE_REG_PERF_DEL, #endif }; struct trace_event_call; #define TRACE_FUNCTION_TYPE ((const char *)~0UL) struct trace_event_fields { const char *type; union { struct { const char *name; const int size; const int align; const unsigned int is_signed:1; unsigned int needs_test:1; const int filter_type; const int len; }; int (*define_fields)(struct trace_event_call *); }; }; struct trace_event_class { const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; #endif int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); struct trace_event_fields *fields_array; struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; int (*raw_init)(struct trace_event_call *); }; extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); struct trace_event_buffer { struct trace_buffer *buffer; struct ring_buffer_event *event; struct trace_event_file *trace_file; void *entry; unsigned int trace_ctx; struct pt_regs *regs; }; void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len); void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_DYNAMIC_BIT, TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, TRACE_EVENT_FL_EPROBE_BIT, TRACE_EVENT_FL_FPROBE_BIT, TRACE_EVENT_FL_CUSTOM_BIT, TRACE_EVENT_FL_TEST_STR_BIT, }; /* * Event flags: * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file * TRACEPOINT - Event is a tracepoint * DYNAMIC - Event is a dynamic event (created at run time) * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe * EPROBE - Event is an event probe * FPROBE - Event is an function probe * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) * This is set when the custom event has not been attached * to a tracepoint yet, then it is cleared when it is. * TEST_STR - The event has a "%s" that points to a string outside the event */ enum { TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_DYNAMIC = (1 << TRACE_EVENT_FL_DYNAMIC_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT), TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) struct trace_event_call { struct list_head list; struct trace_event_class *class; union { const char *name; /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ struct tracepoint *tp; }; struct trace_event event; char *print_fmt; /* * Static events can disappear with modules, * where as dynamic ones need their own ref count. */ union { void *module; atomic_t refcnt; }; void *data; /* See the TRACE_EVENT_FL_* flags above */ int flags; /* static flags of different events */ #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; #ifdef CONFIG_DYNAMIC_EVENTS bool trace_event_dyn_try_get_ref(struct trace_event_call *call); void trace_event_dyn_put_ref(struct trace_event_call *call); bool trace_event_dyn_busy(struct trace_event_call *call); #else static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call) { /* Without DYNAMIC_EVENTS configured, nothing should be calling this */ return false; } static inline void trace_event_dyn_put_ref(struct trace_event_call *call) { } static inline bool trace_event_dyn_busy(struct trace_event_call *call) { /* Nothing should call this without DYNAIMIC_EVENTS configured. */ return true; } #endif static inline bool trace_event_try_get_ref(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_DYNAMIC) return trace_event_dyn_try_get_ref(call); else return try_module_get(call->module); } static inline void trace_event_put_ref(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_DYNAMIC) trace_event_dyn_put_ref(call); else module_put(call->module); } #ifdef CONFIG_PERF_EVENTS static inline bool bpf_prog_array_valid(struct trace_event_call *call) { /* * This inline function checks whether call->prog_array * is valid or not. The function is called in various places, * outside rcu_read_lock/unlock, as a heuristic to speed up execution. * * If this function returns true, and later call->prog_array * becomes false inside rcu_read_lock/unlock region, * we bail out then. If this function return false, * there is a risk that we might miss a few events if the checking * were delayed until inside rcu_read_lock/unlock region and * call->prog_array happened to become non-NULL then. * * Here, READ_ONCE() is used instead of rcu_access_pointer(). * rcu_access_pointer() requires the actual definition of * "struct bpf_prog_array" while READ_ONCE() only needs * a declaration of the same type. */ return !!READ_ONCE(call->prog_array); } #endif static inline const char * trace_event_name(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_CUSTOM) return call->name; else if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; else return call->name; } static inline struct list_head * trace_get_fields(struct trace_event_call *event_call) { if (!event_call->class->get_fields) return &event_call->class->fields; return event_call->class->get_fields(event_call); } struct trace_subsystem_dir; enum { EVENT_FILE_FL_ENABLED_BIT, EVENT_FILE_FL_RECORDED_CMD_BIT, EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, EVENT_FILE_FL_WAS_ENABLED_BIT, EVENT_FILE_FL_FREED_BIT, }; extern struct trace_event_file *trace_get_event_file(const char *instance, const char *system, const char *event); extern void trace_put_event_file(struct trace_event_file *file); #define MAX_DYNEVENT_CMD_LEN (2048) enum dynevent_type { DYNEVENT_TYPE_SYNTH = 1, DYNEVENT_TYPE_KPROBE, DYNEVENT_TYPE_NONE, }; struct dynevent_cmd; typedef int (*dynevent_create_fn_t)(struct dynevent_cmd *cmd); struct dynevent_cmd { struct seq_buf seq; const char *event_name; unsigned int n_fields; enum dynevent_type type; dynevent_create_fn_t run_command; void *private_data; }; extern int dynevent_create(struct dynevent_cmd *cmd); extern int synth_event_delete(const char *name); extern void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen); extern int __synth_event_gen_cmd_start(struct dynevent_cmd *cmd, const char *name, struct module *mod, ...); #define synth_event_gen_cmd_start(cmd, name, mod, ...) \ __synth_event_gen_cmd_start(cmd, name, mod, ## __VA_ARGS__, NULL) struct synth_field_desc { const char *type; const char *name; }; extern int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, struct module *mod, struct synth_field_desc *fields, unsigned int n_fields); extern int synth_event_create(const char *name, struct synth_field_desc *fields, unsigned int n_fields, struct module *mod); extern int synth_event_add_field(struct dynevent_cmd *cmd, const char *type, const char *name); extern int synth_event_add_field_str(struct dynevent_cmd *cmd, const char *type_name); extern int synth_event_add_fields(struct dynevent_cmd *cmd, struct synth_field_desc *fields, unsigned int n_fields); #define synth_event_gen_cmd_end(cmd) \ dynevent_create(cmd) struct synth_event; struct synth_event_trace_state { struct trace_event_buffer fbuffer; struct synth_trace_event *entry; struct trace_buffer *buffer; struct synth_event *event; unsigned int cur_field; unsigned int n_u64; bool disabled; bool add_next; bool add_name; }; extern int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...); extern int synth_event_trace_array(struct trace_event_file *file, u64 *vals, unsigned int n_vals); extern int synth_event_trace_start(struct trace_event_file *file, struct synth_event_trace_state *trace_state); extern int synth_event_add_next_val(u64 val, struct synth_event_trace_state *trace_state); extern int synth_event_add_val(const char *field_name, u64 val, struct synth_event_trace_state *trace_state); extern int synth_event_trace_end(struct synth_event_trace_state *trace_state); extern int kprobe_event_delete(const char *name); extern void kprobe_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen); #define kprobe_event_gen_cmd_start(cmd, name, loc, ...) \ __kprobe_event_gen_cmd_start(cmd, false, name, loc, ## __VA_ARGS__, NULL) #define kretprobe_event_gen_cmd_start(cmd, name, loc, ...) \ __kprobe_event_gen_cmd_start(cmd, true, name, loc, ## __VA_ARGS__, NULL) extern int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, const char *name, const char *loc, ...); #define kprobe_event_add_fields(cmd, ...) \ __kprobe_event_add_fields(cmd, ## __VA_ARGS__, NULL) #define kprobe_event_add_field(cmd, field) \ __kprobe_event_add_fields(cmd, field, NULL) extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); #define kprobe_event_gen_cmd_end(cmd) \ dynevent_create(cmd) #define kretprobe_event_gen_cmd_end(cmd) \ dynevent_create(cmd) /* * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid * WAS_ENABLED - Set when enabled to know to clear trace on module removal * FREED - File descriptor is freed, all fields should be considered invalid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), }; struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; /* * 32 bit flags: * bit 0: enabled * bit 1: enabled cmd record * bit 2: enable/disable with the soft disable bit * bit 3: soft disabled * bit 4: trigger enabled * * Note: The bits must be set atomically to prevent races * from other writers. Reads of flags do not need to be in * sync as they occur in critical sections. But the way flags * is currently used, these changes do not affect the code * except that when a change is made, it may have a slight * delay in propagating the changes to other CPUs due to * caching and such. Which is mostly OK ;-) */ unsigned long flags; refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ event_##name.flags |= value; \ return 0; \ } \ early_initcall(trace_init_flags_##name); #define __TRACE_EVENT_PERF_PERM(name, expr...) \ static int perf_perm_##name(struct trace_event_call *tp_event, \ struct perf_event *p_event) \ { \ return ({ expr; }); \ } \ static int __init trace_init_perf_perm_##name(void) \ { \ event_##name.perf_perm = &perf_perm_##name; \ return 0; \ } \ early_initcall(trace_init_perf_perm_##name); #define PERF_MAX_TRACE_SIZE 8192 #define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ enum event_trigger_type { ETT_NONE = (0), ETT_TRACE_ONOFF = (1 << 0), ETT_SNAPSHOT = (1 << 1), ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), ETT_EVENT_HIST = (1 << 4), ETT_HIST_ENABLE = (1 << 5), ETT_EVENT_EPROBE = (1 << 6), }; extern int filter_match_preds(struct event_filter *filter, void *rec); extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event); extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); bool __trace_trigger_soft_disabled(struct trace_event_file *file); /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test * * If any triggers without filters are attached to this event, they * will be called here. If the event is soft disabled and has no * triggers that require testing the fields, it will return true, * otherwise false. */ static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_PID_FILTER)))) return false; if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) return false; return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); struct bpf_raw_tp_link; int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr, unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } static inline int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie) { return -EOPNOTSUPP; } static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } static inline int perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } struct bpf_raw_tp_link; static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) { return NULL; } static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) { } static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } static inline int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } static inline int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } #endif enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_CPUMASK, FILTER_COMM, FILTER_CPU, FILTER_STACKTRACE, }; extern int trace_event_raw_init(struct trace_event_call *call); extern int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable); /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a * constant. Even with the outer if statement optimizing out. */ #define event_trace_printk(ip, fmt, args...) \ do { \ __trace_printk_check_format(fmt, ##args); \ tracing_record_cmdline(current); \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt \ __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_bprintk(ip, trace_printk_fmt, ##args); \ } else \ __trace_printk(ip, fmt, ##args); \ } while (0) #ifdef CONFIG_PERF_EVENTS struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); extern int perf_trace_add(struct perf_event *event, int flags); extern void perf_trace_del(struct perf_event *event, int flags); #ifdef CONFIG_KPROBE_EVENTS extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS extern int perf_uprobe_init(struct perf_event *event, unsigned long ref_ctr_offset, bool is_retprobe); extern void perf_uprobe_destroy(struct perf_event *event); extern int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, u64 *probe_offset, u64 *probe_addr, bool perf_type_tracepoint); #endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_free_bpf_prog(struct perf_event *event); void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1); void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2); void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3); void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4); void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5); void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8); void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9); void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10); void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11); void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct trace_event_call *call, u64 count, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif #define TRACE_EVENT_STR_MAX 512 /* * gcc warns that you can not use a va_list in an inlined * function. But lets me make it into a macro :-/ */ #define __trace_event_vstr_len(fmt, va) \ ({ \ va_list __ap; \ int __ret; \ \ va_copy(__ap, *(va)); \ __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ va_end(__ap); \ \ min(__ret, TRACE_EVENT_STR_MAX); \ }) #endif /* _LINUX_TRACE_EVENT_H */ /* * Note: we keep the TRACE_CUSTOM_EVENT outside the include file ifdef protection. * This is due to the way trace custom events work. If a file includes two * trace event headers under one "CREATE_CUSTOM_TRACE_EVENTS" the first include * will override the TRACE_CUSTOM_EVENT and break the second include. */ #ifndef TRACE_CUSTOM_EVENT #define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_CUSTOM_EVENT(template, name, proto, args) #define TRACE_CUSTOM_EVENT(name, proto, args, struct, assign, print) #endif /* ifdef TRACE_CUSTOM_EVENT (see note above) */
5 10 13 6 13 13 2 8 1 4 1 3 13 7 2 4 3 5 2 7 5 2 4 3 6 1 2818 16 2807 2 1 1 1 1 13 7 7 1 1 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/scatterlist.h> #include <linux/mutex.h> #include <linux/timer.h> #include <linux/usb.h> #define SIMPLE_IO_TIMEOUT 10000 /* in milliseconds */ /*-------------------------------------------------------------------------*/ static int override_alt = -1; module_param_named(alt, override_alt, int, 0644); MODULE_PARM_DESC(alt, ">= 0 to override altsetting selection"); static void complicated_callback(struct urb *urb); /*-------------------------------------------------------------------------*/ /* FIXME make these public somewhere; usbdevfs.h? */ /* Parameter for usbtest driver. */ struct usbtest_param_32 { /* inputs */ __u32 test_num; /* 0..(TEST_CASES-1) */ __u32 iterations; __u32 length; __u32 vary; __u32 sglen; /* outputs */ __s32 duration_sec; __s32 duration_usec; }; /* * Compat parameter to the usbtest driver. * This supports older user space binaries compiled with 64 bit compiler. */ struct usbtest_param_64 { /* inputs */ __u32 test_num; /* 0..(TEST_CASES-1) */ __u32 iterations; __u32 length; __u32 vary; __u32 sglen; /* outputs */ __s64 duration_sec; __s64 duration_usec; }; /* IOCTL interface to the driver. */ #define USBTEST_REQUEST_32 _IOWR('U', 100, struct usbtest_param_32) /* COMPAT IOCTL interface to the driver. */ #define USBTEST_REQUEST_64 _IOWR('U', 100, struct usbtest_param_64) /*-------------------------------------------------------------------------*/ #define GENERIC /* let probe() bind using module params */ /* Some devices that can be used for testing will have "real" drivers. * Entries for those need to be enabled here by hand, after disabling * that "real" driver. */ //#define IBOT2 /* grab iBOT2 webcams */ //#define KEYSPAN_19Qi /* grab un-renumerated serial adapter */ /*-------------------------------------------------------------------------*/ struct usbtest_info { const char *name; u8 ep_in; /* bulk/intr source */ u8 ep_out; /* bulk/intr sink */ unsigned autoconf:1; unsigned ctrl_out:1; unsigned iso:1; /* try iso in/out */ unsigned intr:1; /* try interrupt in/out */ int alt; }; /* this is accessed only through usbfs ioctl calls. * one ioctl to issue a test ... one lock per device. * tests create other threads if they need them. * urbs and buffers are allocated dynamically, * and data generated deterministically. */ struct usbtest_dev { struct usb_interface *intf; struct usbtest_info *info; int in_pipe; int out_pipe; int in_iso_pipe; int out_iso_pipe; int in_int_pipe; int out_int_pipe; struct usb_endpoint_descriptor *iso_in, *iso_out; struct usb_endpoint_descriptor *int_in, *int_out; struct mutex lock; #define TBUF_SIZE 256 u8 *buf; }; static struct usb_device *testdev_to_usbdev(struct usbtest_dev *test) { return interface_to_usbdev(test->intf); } /* set up all urbs so they can be used with either bulk or interrupt */ #define INTERRUPT_RATE 1 /* msec/transfer */ #define ERROR(tdev, fmt, args...) \ dev_err(&(tdev)->intf->dev , fmt , ## args) #define WARNING(tdev, fmt, args...) \ dev_warn(&(tdev)->intf->dev , fmt , ## args) #define GUARD_BYTE 0xA5 #define MAX_SGLEN 128 /*-------------------------------------------------------------------------*/ static inline void endpoint_update(int edi, struct usb_host_endpoint **in, struct usb_host_endpoint **out, struct usb_host_endpoint *e) { if (edi) { if (!*in) *in = e; } else { if (!*out) *out = e; } } static int get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf) { int tmp; struct usb_host_interface *alt; struct usb_host_endpoint *in, *out; struct usb_host_endpoint *iso_in, *iso_out; struct usb_host_endpoint *int_in, *int_out; struct usb_device *udev; for (tmp = 0; tmp < intf->num_altsetting; tmp++) { unsigned ep; in = out = NULL; iso_in = iso_out = NULL; int_in = int_out = NULL; alt = intf->altsetting + tmp; if (override_alt >= 0 && override_alt != alt->desc.bAlternateSetting) continue; /* take the first altsetting with in-bulk + out-bulk; * ignore other endpoints and altsettings. */ for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) { struct usb_host_endpoint *e; int edi; e = alt->endpoint + ep; edi = usb_endpoint_dir_in(&e->desc); switch (usb_endpoint_type(&e->desc)) { case USB_ENDPOINT_XFER_BULK: endpoint_update(edi, &in, &out, e); continue; case USB_ENDPOINT_XFER_INT: if (dev->info->intr) endpoint_update(edi, &int_in, &int_out, e); continue; case USB_ENDPOINT_XFER_ISOC: if (dev->info->iso) endpoint_update(edi, &iso_in, &iso_out, e); fallthrough; default: continue; } } if ((in && out) || iso_in || iso_out || int_in || int_out) goto found; } return -EINVAL; found: udev = testdev_to_usbdev(dev); dev->info->alt = alt->desc.bAlternateSetting; if (alt->desc.bAlternateSetting != 0) { tmp = usb_set_interface(udev, alt->desc.bInterfaceNumber, alt->desc.bAlternateSetting); if (tmp < 0) return tmp; } if (in) dev->in_pipe = usb_rcvbulkpipe(udev, in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); if (out) dev->out_pipe = usb_sndbulkpipe(udev, out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); if (iso_in) { dev->iso_in = &iso_in->desc; dev->in_iso_pipe = usb_rcvisocpipe(udev, iso_in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (iso_out) { dev->iso_out = &iso_out->desc; dev->out_iso_pipe = usb_sndisocpipe(udev, iso_out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (int_in) { dev->int_in = &int_in->desc; dev->in_int_pipe = usb_rcvintpipe(udev, int_in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (int_out) { dev->int_out = &int_out->desc; dev->out_int_pipe = usb_sndintpipe(udev, int_out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } return 0; } /*-------------------------------------------------------------------------*/ /* Support for testing basic non-queued I/O streams. * * These just package urbs as requests that can be easily canceled. * Each urb's data buffer is dynamically allocated; callers can fill * them with non-zero test data (or test for it) when appropriate. */ static void simple_callback(struct urb *urb) { complete(urb->context); } static struct urb *usbtest_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, unsigned transfer_flags, unsigned offset, u8 bInterval, usb_complete_t complete_fn) { struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return urb; if (bInterval) usb_fill_int_urb(urb, udev, pipe, NULL, bytes, complete_fn, NULL, bInterval); else usb_fill_bulk_urb(urb, udev, pipe, NULL, bytes, complete_fn, NULL); urb->interval = (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE; urb->transfer_flags = transfer_flags; if (usb_pipein(pipe)) urb->transfer_flags |= URB_SHORT_NOT_OK; if ((bytes + offset) == 0) return urb; if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset, GFP_KERNEL, &urb->transfer_dma); else urb->transfer_buffer = kmalloc(bytes + offset, GFP_KERNEL); if (!urb->transfer_buffer) { usb_free_urb(urb); return NULL; } /* To test unaligned transfers add an offset and fill the unused memory with a guard value */ if (offset) { memset(urb->transfer_buffer, GUARD_BYTE, offset); urb->transfer_buffer += offset; if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) urb->transfer_dma += offset; } /* For inbound transfers use guard byte so that test fails if data not correctly copied */ memset(urb->transfer_buffer, usb_pipein(urb->pipe) ? GUARD_BYTE : 0, bytes); return urb; } static struct urb *simple_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, u8 bInterval) { return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0, bInterval, simple_callback); } static struct urb *complicated_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, u8 bInterval) { return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0, bInterval, complicated_callback); } static unsigned pattern; static unsigned mod_pattern; module_param_named(pattern, mod_pattern, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(mod_pattern, "i/o pattern (0 == zeroes)"); static unsigned get_maxpacket(struct usb_device *udev, int pipe) { struct usb_host_endpoint *ep; ep = usb_pipe_endpoint(udev, pipe); return le16_to_cpup(&ep->desc.wMaxPacketSize); } static int ss_isoc_get_packet_num(struct usb_device *udev, int pipe) { struct usb_host_endpoint *ep = usb_pipe_endpoint(udev, pipe); return USB_SS_MULT(ep->ss_ep_comp.bmAttributes) * (1 + ep->ss_ep_comp.bMaxBurst); } static void simple_fill_buf(struct urb *urb) { unsigned i; u8 *buf = urb->transfer_buffer; unsigned len = urb->transfer_buffer_length; unsigned maxpacket; switch (pattern) { default: fallthrough; case 0: memset(buf, 0, len); break; case 1: /* mod63 */ maxpacket = get_maxpacket(urb->dev, urb->pipe); for (i = 0; i < len; i++) *buf++ = (u8) ((i % maxpacket) % 63); break; } } static inline unsigned long buffer_offset(void *buf) { return (unsigned long)buf & (ARCH_KMALLOC_MINALIGN - 1); } static int check_guard_bytes(struct usbtest_dev *tdev, struct urb *urb) { u8 *buf = urb->transfer_buffer; u8 *guard = buf - buffer_offset(buf); unsigned i; for (i = 0; guard < buf; i++, guard++) { if (*guard != GUARD_BYTE) { ERROR(tdev, "guard byte[%d] %d (not %d)\n", i, *guard, GUARD_BYTE); return -EINVAL; } } return 0; } static int simple_check_buf(struct usbtest_dev *tdev, struct urb *urb) { unsigned i; u8 expected; u8 *buf = urb->transfer_buffer; unsigned len = urb->actual_length; unsigned maxpacket = get_maxpacket(urb->dev, urb->pipe); int ret = check_guard_bytes(tdev, urb); if (ret) return ret; for (i = 0; i < len; i++, buf++) { switch (pattern) { /* all-zeroes has no synchronization issues */ case 0: expected = 0; break; /* mod63 stays in sync with short-terminated transfers, * or otherwise when host and gadget agree on how large * each usb transfer request should be. resync is done * with set_interface or set_config. */ case 1: /* mod63 */ expected = (i % maxpacket) % 63; break; /* always fail unsupported patterns */ default: expected = !*buf; break; } if (*buf == expected) continue; ERROR(tdev, "buf[%d] = %d (not %d)\n", i, *buf, expected); return -EINVAL; } return 0; } static void simple_free_urb(struct urb *urb) { unsigned long offset = buffer_offset(urb->transfer_buffer); if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) usb_free_coherent( urb->dev, urb->transfer_buffer_length + offset, urb->transfer_buffer - offset, urb->transfer_dma - offset); else kfree(urb->transfer_buffer - offset); usb_free_urb(urb); } static int simple_io( struct usbtest_dev *tdev, struct urb *urb, int iterations, int vary, int expected, const char *label ) { struct usb_device *udev = urb->dev; int max = urb->transfer_buffer_length; struct completion completion; int retval = 0; unsigned long expire; urb->context = &completion; while (retval == 0 && iterations-- > 0) { init_completion(&completion); if (usb_pipeout(urb->pipe)) { simple_fill_buf(urb); urb->transfer_flags |= URB_ZERO_PACKET; } retval = usb_submit_urb(urb, GFP_KERNEL); if (retval != 0) break; expire = msecs_to_jiffies(SIMPLE_IO_TIMEOUT); if (!wait_for_completion_timeout(&completion, expire)) { usb_kill_urb(urb); retval = (urb->status == -ENOENT ? -ETIMEDOUT : urb->status); } else { retval = urb->status; } urb->dev = udev; if (retval == 0 && usb_pipein(urb->pipe)) retval = simple_check_buf(tdev, urb); if (vary) { int len = urb->transfer_buffer_length; len += vary; len %= max; if (len == 0) len = (vary < max) ? vary : max; urb->transfer_buffer_length = len; } /* FIXME if endpoint halted, clear halt (and log) */ } urb->transfer_buffer_length = max; if (expected != retval) dev_err(&udev->dev, "%s failed, iterations left %d, status %d (not %d)\n", label, iterations, retval, expected); return retval; } /*-------------------------------------------------------------------------*/ /* We use scatterlist primitives to test queued I/O. * Yes, this also tests the scatterlist primitives. */ static void free_sglist(struct scatterlist *sg, int nents) { unsigned i; if (!sg) return; for (i = 0; i < nents; i++) { if (!sg_page(&sg[i])) continue; kfree(sg_virt(&sg[i])); } kfree(sg); } static struct scatterlist * alloc_sglist(int nents, int max, int vary, struct usbtest_dev *dev, int pipe) { struct scatterlist *sg; unsigned int n_size = 0; unsigned i; unsigned size = max; unsigned maxpacket = get_maxpacket(interface_to_usbdev(dev->intf), pipe); if (max == 0) return NULL; sg = kmalloc_array(nents, sizeof(*sg), GFP_KERNEL); if (!sg) return NULL; sg_init_table(sg, nents); for (i = 0; i < nents; i++) { char *buf; unsigned j; buf = kzalloc(size, GFP_KERNEL); if (!buf) { free_sglist(sg, i); return NULL; } /* kmalloc pages are always physically contiguous! */ sg_set_buf(&sg[i], buf, size); switch (pattern) { case 0: /* already zeroed */ break; case 1: for (j = 0; j < size; j++) *buf++ = (u8) (((j + n_size) % maxpacket) % 63); n_size += size; break; } if (vary) { size += vary; size %= max; if (size == 0) size = (vary < max) ? vary : max; } } return sg; } struct sg_timeout { struct timer_list timer; struct usb_sg_request *req; }; static void sg_timeout(struct timer_list *t) { struct sg_timeout *timeout = from_timer(timeout, t, timer); usb_sg_cancel(timeout->req); } static int perform_sglist( struct usbtest_dev *tdev, unsigned iterations, int pipe, struct usb_sg_request *req, struct scatterlist *sg, int nents ) { struct usb_device *udev = testdev_to_usbdev(tdev); int retval = 0; struct sg_timeout timeout = { .req = req, }; timer_setup_on_stack(&timeout.timer, sg_timeout, 0); while (retval == 0 && iterations-- > 0) { retval = usb_sg_init(req, udev, pipe, (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE, sg, nents, 0, GFP_KERNEL); if (retval) break; mod_timer(&timeout.timer, jiffies + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); if (!del_timer_sync(&timeout.timer)) retval = -ETIMEDOUT; else retval = req->status; destroy_timer_on_stack(&timeout.timer); /* FIXME check resulting data pattern */ /* FIXME if endpoint halted, clear halt (and log) */ } /* FIXME for unlink or fault handling tests, don't report * failure if retval is as we expected ... */ if (retval) ERROR(tdev, "perform_sglist failed, " "iterations left %d, status %d\n", iterations, retval); return retval; } /*-------------------------------------------------------------------------*/ /* unqueued control message testing * * there's a nice set of device functional requirements in chapter 9 of the * usb 2.0 spec, which we can apply to ANY device, even ones that don't use * special test firmware. * * we know the device is configured (or suspended) by the time it's visible * through usbfs. we can't change that, so we won't test enumeration (which * worked 'well enough' to get here, this time), power management (ditto), * or remote wakeup (which needs human interaction). */ static unsigned realworld = 1; module_param(realworld, uint, 0); MODULE_PARM_DESC(realworld, "clear to demand stricter spec compliance"); static int get_altsetting(struct usbtest_dev *dev) { struct usb_interface *iface = dev->intf; struct usb_device *udev = interface_to_usbdev(iface); int retval; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_INTERFACE, USB_DIR_IN|USB_RECIP_INTERFACE, 0, iface->altsetting[0].desc.bInterfaceNumber, dev->buf, 1, USB_CTRL_GET_TIMEOUT); switch (retval) { case 1: return dev->buf[0]; case 0: retval = -ERANGE; fallthrough; default: return retval; } } static int set_altsetting(struct usbtest_dev *dev, int alternate) { struct usb_interface *iface = dev->intf; struct usb_device *udev; if (alternate < 0 || alternate >= 256) return -EINVAL; udev = interface_to_usbdev(iface); return usb_set_interface(udev, iface->altsetting[0].desc.bInterfaceNumber, alternate); } static int is_good_config(struct usbtest_dev *tdev, int len) { struct usb_config_descriptor *config; if (len < (int)sizeof(*config)) return 0; config = (struct usb_config_descriptor *) tdev->buf; switch (config->bDescriptorType) { case USB_DT_CONFIG: case USB_DT_OTHER_SPEED_CONFIG: if (config->bLength != 9) { ERROR(tdev, "bogus config descriptor length\n"); return 0; } /* this bit 'must be 1' but often isn't */ if (!realworld && !(config->bmAttributes & 0x80)) { ERROR(tdev, "high bit of config attributes not set\n"); return 0; } if (config->bmAttributes & 0x1f) { /* reserved == 0 */ ERROR(tdev, "reserved config bits set\n"); return 0; } break; default: return 0; } if (le16_to_cpu(config->wTotalLength) == len) /* read it all */ return 1; if (le16_to_cpu(config->wTotalLength) >= TBUF_SIZE) /* max partial read */ return 1; ERROR(tdev, "bogus config descriptor read size\n"); return 0; } static int is_good_ext(struct usbtest_dev *tdev, u8 *buf) { struct usb_ext_cap_descriptor *ext; u32 attr; ext = (struct usb_ext_cap_descriptor *) buf; if (ext->bLength != USB_DT_USB_EXT_CAP_SIZE) { ERROR(tdev, "bogus usb 2.0 extension descriptor length\n"); return 0; } attr = le32_to_cpu(ext->bmAttributes); /* bits[1:15] is used and others are reserved */ if (attr & ~0xfffe) { /* reserved == 0 */ ERROR(tdev, "reserved bits set\n"); return 0; } return 1; } static int is_good_ss_cap(struct usbtest_dev *tdev, u8 *buf) { struct usb_ss_cap_descriptor *ss; ss = (struct usb_ss_cap_descriptor *) buf; if (ss->bLength != USB_DT_USB_SS_CAP_SIZE) { ERROR(tdev, "bogus superspeed device capability descriptor length\n"); return 0; } /* * only bit[1] of bmAttributes is used for LTM and others are * reserved */ if (ss->bmAttributes & ~0x02) { /* reserved == 0 */ ERROR(tdev, "reserved bits set in bmAttributes\n"); return 0; } /* bits[0:3] of wSpeedSupported is used and others are reserved */ if (le16_to_cpu(ss->wSpeedSupported) & ~0x0f) { /* reserved == 0 */ ERROR(tdev, "reserved bits set in wSpeedSupported\n"); return 0; } return 1; } static int is_good_con_id(struct usbtest_dev *tdev, u8 *buf) { struct usb_ss_container_id_descriptor *con_id; con_id = (struct usb_ss_container_id_descriptor *) buf; if (con_id->bLength != USB_DT_USB_SS_CONTN_ID_SIZE) { ERROR(tdev, "bogus container id descriptor length\n"); return 0; } if (con_id->bReserved) { /* reserved == 0 */ ERROR(tdev, "reserved bits set\n"); return 0; } return 1; } /* sanity test for standard requests working with usb_control_mesg() and some * of the utility functions which use it. * * this doesn't test how endpoint halts behave or data toggles get set, since * we won't do I/O to bulk/interrupt endpoints here (which is how to change * halt or toggle). toggle testing is impractical without support from hcds. * * this avoids failing devices linux would normally work with, by not testing * config/altsetting operations for devices that only support their defaults. * such devices rarely support those needless operations. * * NOTE that since this is a sanity test, it's not examining boundary cases * to see if usbcore, hcd, and device all behave right. such testing would * involve varied read sizes and other operation sequences. */ static int ch9_postconfig(struct usbtest_dev *dev) { struct usb_interface *iface = dev->intf; struct usb_device *udev = interface_to_usbdev(iface); int i, alt, retval; /* [9.2.3] if there's more than one altsetting, we need to be able to * set and get each one. mostly trusts the descriptors from usbcore. */ for (i = 0; i < iface->num_altsetting; i++) { /* 9.2.3 constrains the range here */ alt = iface->altsetting[i].desc.bAlternateSetting; if (alt < 0 || alt >= iface->num_altsetting) { dev_err(&iface->dev, "invalid alt [%d].bAltSetting = %d\n", i, alt); } /* [real world] get/set unimplemented if there's only one */ if (realworld && iface->num_altsetting == 1) continue; /* [9.4.10] set_interface */ retval = set_altsetting(dev, alt); if (retval) { dev_err(&iface->dev, "can't set_interface = %d, %d\n", alt, retval); return retval; } /* [9.4.4] get_interface always works */ retval = get_altsetting(dev); if (retval != alt) { dev_err(&iface->dev, "get alt should be %d, was %d\n", alt, retval); return (retval < 0) ? retval : -EDOM; } } /* [real world] get_config unimplemented if there's only one */ if (!realworld || udev->descriptor.bNumConfigurations != 1) { int expected = udev->actconfig->desc.bConfigurationValue; /* [9.4.2] get_configuration always works * ... although some cheap devices (like one TI Hub I've got) * won't return config descriptors except before set_config. */ retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_CONFIGURATION, USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, dev->buf, 1, USB_CTRL_GET_TIMEOUT); if (retval != 1 || dev->buf[0] != expected) { dev_err(&iface->dev, "get config --> %d %d (1 %d)\n", retval, dev->buf[0], expected); return (retval < 0) ? retval : -EDOM; } } /* there's always [9.4.3] a device descriptor [9.6.1] */ retval = usb_get_descriptor(udev, USB_DT_DEVICE, 0, dev->buf, sizeof(udev->descriptor)); if (retval != sizeof(udev->descriptor)) { dev_err(&iface->dev, "dev descriptor --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } /* * there's always [9.4.3] a bos device descriptor [9.6.2] in USB * 3.0 spec */ if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0210) { struct usb_bos_descriptor *bos = NULL; struct usb_dev_cap_header *header = NULL; unsigned total, num, length; u8 *buf; retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf, sizeof(*udev->bos->desc)); if (retval != sizeof(*udev->bos->desc)) { dev_err(&iface->dev, "bos descriptor --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } bos = (struct usb_bos_descriptor *)dev->buf; total = le16_to_cpu(bos->wTotalLength); num = bos->bNumDeviceCaps; if (total > TBUF_SIZE) total = TBUF_SIZE; /* * get generic device-level capability descriptors [9.6.2] * in USB 3.0 spec */ retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf, total); if (retval != total) { dev_err(&iface->dev, "bos descriptor set --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } length = sizeof(*udev->bos->desc); buf = dev->buf; for (i = 0; i < num; i++) { buf += length; if (buf + sizeof(struct usb_dev_cap_header) > dev->buf + total) break; header = (struct usb_dev_cap_header *)buf; length = header->bLength; if (header->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { dev_warn(&udev->dev, "not device capability descriptor, skip\n"); continue; } switch (header->bDevCapabilityType) { case USB_CAP_TYPE_EXT: if (buf + USB_DT_USB_EXT_CAP_SIZE > dev->buf + total || !is_good_ext(dev, buf)) { dev_err(&iface->dev, "bogus usb 2.0 extension descriptor\n"); return -EDOM; } break; case USB_SS_CAP_TYPE: if (buf + USB_DT_USB_SS_CAP_SIZE > dev->buf + total || !is_good_ss_cap(dev, buf)) { dev_err(&iface->dev, "bogus superspeed device capability descriptor\n"); return -EDOM; } break; case CONTAINER_ID_TYPE: if (buf + USB_DT_USB_SS_CONTN_ID_SIZE > dev->buf + total || !is_good_con_id(dev, buf)) { dev_err(&iface->dev, "bogus container id descriptor\n"); return -EDOM; } break; default: break; } } } /* there's always [9.4.3] at least one config descriptor [9.6.3] */ for (i = 0; i < udev->descriptor.bNumConfigurations; i++) { retval = usb_get_descriptor(udev, USB_DT_CONFIG, i, dev->buf, TBUF_SIZE); if (!is_good_config(dev, retval)) { dev_err(&iface->dev, "config [%d] descriptor --> %d\n", i, retval); return (retval < 0) ? retval : -EDOM; } /* FIXME cross-checking udev->config[i] to make sure usbcore * parsed it right (etc) would be good testing paranoia */ } /* and sometimes [9.2.6.6] speed dependent descriptors */ if (le16_to_cpu(udev->descriptor.bcdUSB) == 0x0200) { struct usb_qualifier_descriptor *d = NULL; /* device qualifier [9.6.2] */ retval = usb_get_descriptor(udev, USB_DT_DEVICE_QUALIFIER, 0, dev->buf, sizeof(struct usb_qualifier_descriptor)); if (retval == -EPIPE) { if (udev->speed == USB_SPEED_HIGH) { dev_err(&iface->dev, "hs dev qualifier --> %d\n", retval); return retval; } /* usb2.0 but not high-speed capable; fine */ } else if (retval != sizeof(struct usb_qualifier_descriptor)) { dev_err(&iface->dev, "dev qualifier --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } else d = (struct usb_qualifier_descriptor *) dev->buf; /* might not have [9.6.2] any other-speed configs [9.6.4] */ if (d) { unsigned max = d->bNumConfigurations; for (i = 0; i < max; i++) { retval = usb_get_descriptor(udev, USB_DT_OTHER_SPEED_CONFIG, i, dev->buf, TBUF_SIZE); if (!is_good_config(dev, retval)) { dev_err(&iface->dev, "other speed config --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } } } } /* FIXME fetch strings from at least the device descriptor */ /* [9.4.5] get_status always works */ retval = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, dev->buf); if (retval) { dev_err(&iface->dev, "get dev status --> %d\n", retval); return retval; } /* FIXME configuration.bmAttributes says if we could try to set/clear * the device's remote wakeup feature ... if we can, test that here */ retval = usb_get_std_status(udev, USB_RECIP_INTERFACE, iface->altsetting[0].desc.bInterfaceNumber, dev->buf); if (retval) { dev_err(&iface->dev, "get interface status --> %d\n", retval); return retval; } /* FIXME get status for each endpoint in the interface */ return 0; } /*-------------------------------------------------------------------------*/ /* use ch9 requests to test whether: * (a) queues work for control, keeping N subtests queued and * active (auto-resubmit) for M loops through the queue. * (b) protocol stalls (control-only) will autorecover. * it's not like bulk/intr; no halt clearing. * (c) short control reads are reported and handled. * (d) queues are always processed in-order */ struct ctrl_ctx { spinlock_t lock; struct usbtest_dev *dev; struct completion complete; unsigned count; unsigned pending; int status; struct urb **urb; struct usbtest_param_32 *param; int last; }; #define NUM_SUBCASES 16 /* how many test subcases here? */ struct subcase { struct usb_ctrlrequest setup; int number; int expected; }; static void ctrl_complete(struct urb *urb) { struct ctrl_ctx *ctx = urb->context; struct usb_ctrlrequest *reqp; struct subcase *subcase; int status = urb->status; unsigned long flags; reqp = (struct usb_ctrlrequest *)urb->setup_packet; subcase = container_of(reqp, struct subcase, setup); spin_lock_irqsave(&ctx->lock, flags); ctx->count--; ctx->pending--; /* queue must transfer and complete in fifo order, unless * usb_unlink_urb() is used to unlink something not at the * physical queue head (not tested). */ if (subcase->number > 0) { if ((subcase->number - ctx->last) != 1) { ERROR(ctx->dev, "subcase %d completed out of order, last %d\n", subcase->number, ctx->last); status = -EDOM; ctx->last = subcase->number; goto error; } } ctx->last = subcase->number; /* succeed or fault in only one way? */ if (status == subcase->expected) status = 0; /* async unlink for cleanup? */ else if (status != -ECONNRESET) { /* some faults are allowed, not required */ if (subcase->expected > 0 && ( ((status == -subcase->expected /* happened */ || status == 0)))) /* didn't */ status = 0; /* sometimes more than one fault is allowed */ else if (subcase->number == 12 && status == -EPIPE) status = 0; else ERROR(ctx->dev, "subtest %d error, status %d\n", subcase->number, status); } /* unexpected status codes mean errors; ideally, in hardware */ if (status) { error: if (ctx->status == 0) { int i; ctx->status = status; ERROR(ctx->dev, "control queue %02x.%02x, err %d, " "%d left, subcase %d, len %d/%d\n", reqp->bRequestType, reqp->bRequest, status, ctx->count, subcase->number, urb->actual_length, urb->transfer_buffer_length); /* FIXME this "unlink everything" exit route should * be a separate test case. */ /* unlink whatever's still pending */ for (i = 1; i < ctx->param->sglen; i++) { struct urb *u = ctx->urb[ (i + subcase->number) % ctx->param->sglen]; if (u == urb || !u->dev) continue; spin_unlock(&ctx->lock); status = usb_unlink_urb(u); spin_lock(&ctx->lock); switch (status) { case -EINPROGRESS: case -EBUSY: case -EIDRM: continue; default: ERROR(ctx->dev, "urb unlink --> %d\n", status); } } status = ctx->status; } } /* resubmit if we need to, else mark this as done */ if ((status == 0) && (ctx->pending < ctx->count)) { status = usb_submit_urb(urb, GFP_ATOMIC); if (status != 0) { ERROR(ctx->dev, "can't resubmit ctrl %02x.%02x, err %d\n", reqp->bRequestType, reqp->bRequest, status); urb->dev = NULL; } else ctx->pending++; } else urb->dev = NULL; /* signal completion when nothing's queued */ if (ctx->pending == 0) complete(&ctx->complete); spin_unlock_irqrestore(&ctx->lock, flags); } static int test_ctrl_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param) { struct usb_device *udev = testdev_to_usbdev(dev); struct urb **urb; struct ctrl_ctx context; int i; if (param->sglen == 0 || param->iterations > UINT_MAX / param->sglen) return -EOPNOTSUPP; spin_lock_init(&context.lock); context.dev = dev; init_completion(&context.complete); context.count = param->sglen * param->iterations; context.pending = 0; context.status = -ENOMEM; context.param = param; context.last = -1; /* allocate and init the urbs we'll queue. * as with bulk/intr sglists, sglen is the queue depth; it also * controls which subtests run (more tests than sglen) or rerun. */ urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL); if (!urb) return -ENOMEM; for (i = 0; i < param->sglen; i++) { int pipe = usb_rcvctrlpipe(udev, 0); unsigned len; struct urb *u; struct usb_ctrlrequest req; struct subcase *reqp; /* sign of this variable means: * -: tested code must return this (negative) error code * +: tested code may return this (negative too) error code */ int expected = 0; /* requests here are mostly expected to succeed on any * device, but some are chosen to trigger protocol stalls * or short reads. */ memset(&req, 0, sizeof(req)); req.bRequest = USB_REQ_GET_DESCRIPTOR; req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE; switch (i % NUM_SUBCASES) { case 0: /* get device descriptor */ req.wValue = cpu_to_le16(USB_DT_DEVICE << 8); len = sizeof(struct usb_device_descriptor); break; case 1: /* get first config descriptor (only) */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = sizeof(struct usb_config_descriptor); break; case 2: /* get altsetting (OFTEN STALLS) */ req.bRequest = USB_REQ_GET_INTERFACE; req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE; /* index = 0 means first interface */ len = 1; expected = EPIPE; break; case 3: /* get interface status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE; /* interface 0 */ len = 2; break; case 4: /* get device status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE; len = 2; break; case 5: /* get device qualifier (MAY STALL) */ req.wValue = cpu_to_le16 (USB_DT_DEVICE_QUALIFIER << 8); len = sizeof(struct usb_qualifier_descriptor); if (udev->speed != USB_SPEED_HIGH) expected = EPIPE; break; case 6: /* get first config descriptor, plus interface */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = sizeof(struct usb_config_descriptor); len += sizeof(struct usb_interface_descriptor); break; case 7: /* get interface descriptor (ALWAYS STALLS) */ req.wValue = cpu_to_le16 (USB_DT_INTERFACE << 8); /* interface == 0 */ len = sizeof(struct usb_interface_descriptor); expected = -EPIPE; break; /* NOTE: two consecutive stalls in the queue here. * that tests fault recovery a bit more aggressively. */ case 8: /* clear endpoint halt (MAY STALL) */ req.bRequest = USB_REQ_CLEAR_FEATURE; req.bRequestType = USB_RECIP_ENDPOINT; /* wValue 0 == ep halt */ /* wIndex 0 == ep0 (shouldn't halt!) */ len = 0; pipe = usb_sndctrlpipe(udev, 0); expected = EPIPE; break; case 9: /* get endpoint status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_ENDPOINT; /* endpoint 0 */ len = 2; break; case 10: /* trigger short read (EREMOTEIO) */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = 1024; expected = -EREMOTEIO; break; /* NOTE: two consecutive _different_ faults in the queue. */ case 11: /* get endpoint descriptor (ALWAYS STALLS) */ req.wValue = cpu_to_le16(USB_DT_ENDPOINT << 8); /* endpoint == 0 */ len = sizeof(struct usb_interface_descriptor); expected = EPIPE; break; /* NOTE: sometimes even a third fault in the queue! */ case 12: /* get string 0 descriptor (MAY STALL) */ req.wValue = cpu_to_le16(USB_DT_STRING << 8); /* string == 0, for language IDs */ len = sizeof(struct usb_interface_descriptor); /* may succeed when > 4 languages */ expected = EREMOTEIO; /* or EPIPE, if no strings */ break; case 13: /* short read, resembling case 10 */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); /* last data packet "should" be DATA1, not DATA0 */ if (udev->speed == USB_SPEED_SUPER) len = 1024 - 512; else len = 1024 - udev->descriptor.bMaxPacketSize0; expected = -EREMOTEIO; break; case 14: /* short read; try to fill the last packet */ req.wValue = cpu_to_le16((USB_DT_DEVICE << 8) | 0); /* device descriptor size == 18 bytes */ len = udev->descriptor.bMaxPacketSize0; if (udev->speed == USB_SPEED_SUPER) len = 512; switch (len) { case 8: len = 24; break; case 16: len = 32; break; } expected = -EREMOTEIO; break; case 15: req.wValue = cpu_to_le16(USB_DT_BOS << 8); if (udev->bos) len = le16_to_cpu(udev->bos->desc->wTotalLength); else len = sizeof(struct usb_bos_descriptor); if (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0201) expected = -EPIPE; break; default: ERROR(dev, "bogus number of ctrl queue testcases!\n"); context.status = -EINVAL; goto cleanup; } req.wLength = cpu_to_le16(len); urb[i] = u = simple_alloc_urb(udev, pipe, len, 0); if (!u) goto cleanup; reqp = kmalloc(sizeof(*reqp), GFP_KERNEL); if (!reqp) goto cleanup; reqp->setup = req; reqp->number = i % NUM_SUBCASES; reqp->expected = expected; u->setup_packet = (char *) &reqp->setup; u->context = &context; u->complete = ctrl_complete; } /* queue the urbs */ context.urb = urb; spin_lock_irq(&context.lock); for (i = 0; i < param->sglen; i++) { context.status = usb_submit_urb(urb[i], GFP_ATOMIC); if (context.status != 0) { ERROR(dev, "can't submit urb[%d], status %d\n", i, context.status); context.count = context.pending; break; } context.pending++; } spin_unlock_irq(&context.lock); /* FIXME set timer and time out; provide a disconnect hook */ /* wait for the last one to complete */ if (context.pending > 0) wait_for_completion(&context.complete); cleanup: for (i = 0; i < param->sglen; i++) { if (!urb[i]) continue; urb[i]->dev = udev; kfree(urb[i]->setup_packet); simple_free_urb(urb[i]); } kfree(urb); return context.status; } #undef NUM_SUBCASES /*-------------------------------------------------------------------------*/ static void unlink1_callback(struct urb *urb) { int status = urb->status; /* we "know" -EPIPE (stall) never happens */ if (!status) status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { urb->status = status; complete(urb->context); } } static int unlink1(struct usbtest_dev *dev, int pipe, int size, int async) { struct urb *urb; struct completion completion; int retval = 0; init_completion(&completion); urb = simple_alloc_urb(testdev_to_usbdev(dev), pipe, size, 0); if (!urb) return -ENOMEM; urb->context = &completion; urb->complete = unlink1_callback; if (usb_pipeout(urb->pipe)) { simple_fill_buf(urb); urb->transfer_flags |= URB_ZERO_PACKET; } /* keep the endpoint busy. there are lots of hc/hcd-internal * states, and testing should get to all of them over time. * * FIXME want additional tests for when endpoint is STALLing * due to errors, or is just NAKing requests. */ retval = usb_submit_urb(urb, GFP_KERNEL); if (retval != 0) { dev_err(&dev->intf->dev, "submit fail %d\n", retval); return retval; } /* unlinking that should always work. variable delay tests more * hcd states and code paths, even with little other system load. */ msleep(jiffies % (2 * INTERRUPT_RATE)); if (async) { while (!completion_done(&completion)) { retval = usb_unlink_urb(urb); if (retval == 0 && usb_pipein(urb->pipe)) retval = simple_check_buf(dev, urb); switch (retval) { case -EBUSY: case -EIDRM: /* we can't unlink urbs while they're completing * or if they've completed, and we haven't * resubmitted. "normal" drivers would prevent * resubmission, but since we're testing unlink * paths, we can't. */ ERROR(dev, "unlink retry\n"); continue; case 0: case -EINPROGRESS: break; default: dev_err(&dev->intf->dev, "unlink fail %d\n", retval); return retval; } break; } } else usb_kill_urb(urb); wait_for_completion(&completion); retval = urb->status; simple_free_urb(urb); if (async) return (retval == -ECONNRESET) ? 0 : retval - 1000; else return (retval == -ENOENT || retval == -EPERM) ? 0 : retval - 2000; } static int unlink_simple(struct usbtest_dev *dev, int pipe, int len) { int retval = 0; /* test sync and async paths */ retval = unlink1(dev, pipe, len, 1); if (!retval) retval = unlink1(dev, pipe, len, 0); return retval; } /*-------------------------------------------------------------------------*/ struct queued_ctx { struct completion complete; atomic_t pending; unsigned num; int status; struct urb **urbs; }; static void unlink_queued_callback(struct urb *urb) { int status = urb->status; struct queued_ctx *ctx = urb->context; if (ctx->status) goto done; if (urb == ctx->urbs[ctx->num - 4] || urb == ctx->urbs[ctx->num - 2]) { if (status == -ECONNRESET) goto done; /* What error should we report if the URB completed normally? */ } if (status != 0) ctx->status = status; done: if (atomic_dec_and_test(&ctx->pending)) complete(&ctx->complete); } static int unlink_queued(struct usbtest_dev *dev, int pipe, unsigned num, unsigned size) { struct queued_ctx ctx; struct usb_device *udev = testdev_to_usbdev(dev); void *buf; dma_addr_t buf_dma; int i; int retval = -ENOMEM; init_completion(&ctx.complete); atomic_set(&ctx.pending, 1); /* One more than the actual value */ ctx.num = num; ctx.status = 0; buf = usb_alloc_coherent(udev, size, GFP_KERNEL, &buf_dma); if (!buf) return retval; memset(buf, 0, size); /* Allocate and init the urbs we'll queue */ ctx.urbs = kcalloc(num, sizeof(struct urb *), GFP_KERNEL); if (!ctx.urbs) goto free_buf; for (i = 0; i < num; i++) { ctx.urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!ctx.urbs[i]) goto free_urbs; usb_fill_bulk_urb(ctx.urbs[i], udev, pipe, buf, size, unlink_queued_callback, &ctx); ctx.urbs[i]->transfer_dma = buf_dma; ctx.urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP; if (usb_pipeout(ctx.urbs[i]->pipe)) { simple_fill_buf(ctx.urbs[i]); ctx.urbs[i]->transfer_flags |= URB_ZERO_PACKET; } } /* Submit all the URBs and then unlink URBs num - 4 and num - 2. */ for (i = 0; i < num; i++) { atomic_inc(&ctx.pending); retval = usb_submit_urb(ctx.urbs[i], GFP_KERNEL); if (retval != 0) { dev_err(&dev->intf->dev, "submit urbs[%d] fail %d\n", i, retval); atomic_dec(&ctx.pending); ctx.status = retval; break; } } if (i == num) { usb_unlink_urb(ctx.urbs[num - 4]); usb_unlink_urb(ctx.urbs[num - 2]); } else { while (--i >= 0) usb_unlink_urb(ctx.urbs[i]); } if (atomic_dec_and_test(&ctx.pending)) /* The extra count */ complete(&ctx.complete); wait_for_completion(&ctx.complete); retval = ctx.status; free_urbs: for (i = 0; i < num; i++) usb_free_urb(ctx.urbs[i]); kfree(ctx.urbs); free_buf: usb_free_coherent(udev, size, buf, buf_dma); return retval; } /*-------------------------------------------------------------------------*/ static int verify_not_halted(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; u16 status; /* shouldn't look or act halted */ retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status); if (retval < 0) { ERROR(tdev, "ep %02x couldn't get no-halt status, %d\n", ep, retval); return retval; } if (status != 0) { ERROR(tdev, "ep %02x bogus status: %04x != 0\n", ep, status); return -EINVAL; } retval = simple_io(tdev, urb, 1, 0, 0, __func__); if (retval != 0) return -EINVAL; return 0; } static int verify_halted(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; u16 status; /* should look and act halted */ retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status); if (retval < 0) { ERROR(tdev, "ep %02x couldn't get halt status, %d\n", ep, retval); return retval; } if (status != 1) { ERROR(tdev, "ep %02x bogus status: %04x != 1\n", ep, status); return -EINVAL; } retval = simple_io(tdev, urb, 1, 0, -EPIPE, __func__); if (retval != -EPIPE) return -EINVAL; retval = simple_io(tdev, urb, 1, 0, -EPIPE, "verify_still_halted"); if (retval != -EPIPE) return -EINVAL; return 0; } static int test_halt(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; /* shouldn't look or act halted now */ retval = verify_not_halted(tdev, ep, urb); if (retval < 0) return retval; /* set halt (protocol test only), verify it worked */ retval = usb_control_msg(urb->dev, usb_sndctrlpipe(urb->dev, 0), USB_REQ_SET_FEATURE, USB_RECIP_ENDPOINT, USB_ENDPOINT_HALT, ep, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval < 0) { ERROR(tdev, "ep %02x couldn't set halt, %d\n", ep, retval); return retval; } retval = verify_halted(tdev, ep, urb); if (retval < 0) { int ret; /* clear halt anyways, else further tests will fail */ ret = usb_clear_halt(urb->dev, urb->pipe); if (ret) ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, ret); return retval; } /* clear halt (tests API + protocol), verify it worked */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } retval = verify_not_halted(tdev, ep, urb); if (retval < 0) return retval; /* NOTE: could also verify SET_INTERFACE clear halts ... */ return 0; } static int test_toggle_sync(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; /* clear initial data toggle to DATA0 */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } /* transfer 3 data packets, should be DATA0, DATA1, DATA0 */ retval = simple_io(tdev, urb, 1, 0, 0, __func__); if (retval != 0) return -EINVAL; /* clear halt resets device side data toggle, host should react to it */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } /* host should use DATA0 again after clear halt */ retval = simple_io(tdev, urb, 1, 0, 0, __func__); return retval; } static int halt_simple(struct usbtest_dev *dev) { int ep; int retval = 0; struct urb *urb; struct usb_device *udev = testdev_to_usbdev(dev); if (udev->speed == USB_SPEED_SUPER) urb = simple_alloc_urb(udev, 0, 1024, 0); else urb = simple_alloc_urb(udev, 0, 512, 0); if (urb == NULL) return -ENOMEM; if (dev->in_pipe) { ep = usb_pipeendpoint(dev->in_pipe) | USB_DIR_IN; urb->pipe = dev->in_pipe; retval = test_halt(dev, ep, urb); if (retval < 0) goto done; } if (dev->out_pipe) { ep = usb_pipeendpoint(dev->out_pipe); urb->pipe = dev->out_pipe; retval = test_halt(dev, ep, urb); } done: simple_free_urb(urb); return retval; } static int toggle_sync_simple(struct usbtest_dev *dev) { int ep; int retval = 0; struct urb *urb; struct usb_device *udev = testdev_to_usbdev(dev); unsigned maxp = get_maxpacket(udev, dev->out_pipe); /* * Create a URB that causes a transfer of uneven amount of data packets * This way the clear toggle has an impact on the data toggle sequence. * Use 2 maxpacket length packets and one zero packet. */ urb = simple_alloc_urb(udev, 0, 2 * maxp, 0); if (urb == NULL) return -ENOMEM; urb->transfer_flags |= URB_ZERO_PACKET; ep = usb_pipeendpoint(dev->out_pipe); urb->pipe = dev->out_pipe; retval = test_toggle_sync(dev, ep, urb); simple_free_urb(urb); return retval; } /*-------------------------------------------------------------------------*/ /* Control OUT tests use the vendor control requests from Intel's * USB 2.0 compliance test device: write a buffer, read it back. * * Intel's spec only _requires_ that it work for one packet, which * is pretty weak. Some HCDs place limits here; most devices will * need to be able to handle more than one OUT data packet. We'll * try whatever we're told to try. */ static int ctrl_out(struct usbtest_dev *dev, unsigned count, unsigned length, unsigned vary, unsigned offset) { unsigned i, j, len; int retval; u8 *buf; char *what = "?"; struct usb_device *udev; if (length < 1 || length > 0xffff || vary >= length) return -EINVAL; buf = kmalloc(length + offset, GFP_KERNEL); if (!buf) return -ENOMEM; buf += offset; udev = testdev_to_usbdev(dev); len = length; retval = 0; /* NOTE: hardware might well act differently if we pushed it * with lots back-to-back queued requests. */ for (i = 0; i < count; i++) { /* write patterned data */ for (j = 0; j < len; j++) buf[j] = (u8)(i + j); retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x5b, USB_DIR_OUT|USB_TYPE_VENDOR, 0, 0, buf, len, USB_CTRL_SET_TIMEOUT); if (retval != len) { what = "write"; if (retval >= 0) { ERROR(dev, "ctrl_out, wlen %d (expected %d)\n", retval, len); retval = -EBADMSG; } break; } /* read it back -- assuming nothing intervened!! */ retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x5c, USB_DIR_IN|USB_TYPE_VENDOR, 0, 0, buf, len, USB_CTRL_GET_TIMEOUT); if (retval != len) { what = "read"; if (retval >= 0) { ERROR(dev, "ctrl_out, rlen %d (expected %d)\n", retval, len); retval = -EBADMSG; } break; } /* fail if we can't verify */ for (j = 0; j < len; j++) { if (buf[j] != (u8)(i + j)) { ERROR(dev, "ctrl_out, byte %d is %d not %d\n", j, buf[j], (u8)(i + j)); retval = -EBADMSG; break; } } if (retval < 0) { what = "verify"; break; } len += vary; /* [real world] the "zero bytes IN" case isn't really used. * hardware can easily trip up in this weird case, since its * status stage is IN, not OUT like other ep0in transfers. */ if (len > length) len = realworld ? 1 : 0; } if (retval < 0) ERROR(dev, "ctrl_out %s failed, code %d, count %d\n", what, retval, i); kfree(buf - offset); return retval; } /*-------------------------------------------------------------------------*/ /* ISO/BULK tests ... mimics common usage * - buffer length is split into N packets (mostly maxpacket sized) * - multi-buffers according to sglen */ struct transfer_context { unsigned count; unsigned pending; spinlock_t lock; struct completion done; int submit_error; unsigned long errors; unsigned long packet_count; struct usbtest_dev *dev; bool is_iso; }; static void complicated_callback(struct urb *urb) { struct transfer_context *ctx = urb->context; unsigned long flags; spin_lock_irqsave(&ctx->lock, flags); ctx->count--; ctx->packet_count += urb->number_of_packets; if (urb->error_count > 0) ctx->errors += urb->error_count; else if (urb->status != 0) ctx->errors += (ctx->is_iso ? urb->number_of_packets : 1); else if (urb->actual_length != urb->transfer_buffer_length) ctx->errors++; else if (check_guard_bytes(ctx->dev, urb) != 0) ctx->errors++; if (urb->status == 0 && ctx->count > (ctx->pending - 1) && !ctx->submit_error) { int status = usb_submit_urb(urb, GFP_ATOMIC); switch (status) { case 0: goto done; default: dev_err(&ctx->dev->intf->dev, "resubmit err %d\n", status); fallthrough; case -ENODEV: /* disconnected */ case -ESHUTDOWN: /* endpoint disabled */ ctx->submit_error = 1; break; } } ctx->pending--; if (ctx->pending == 0) { if (ctx->errors) dev_err(&ctx->dev->intf->dev, "during the test, %lu errors out of %lu\n", ctx->errors, ctx->packet_count); complete(&ctx->done); } done: spin_unlock_irqrestore(&ctx->lock, flags); } static struct urb *iso_alloc_urb( struct usb_device *udev, int pipe, struct usb_endpoint_descriptor *desc, long bytes, unsigned offset ) { struct urb *urb; unsigned i, maxp, packets; if (bytes < 0 || !desc) return NULL; maxp = usb_endpoint_maxp(desc); if (udev->speed >= USB_SPEED_SUPER) maxp *= ss_isoc_get_packet_num(udev, pipe); else maxp *= usb_endpoint_maxp_mult(desc); packets = DIV_ROUND_UP(bytes, maxp); urb = usb_alloc_urb(packets, GFP_KERNEL); if (!urb) return urb; urb->dev = udev; urb->pipe = pipe; urb->number_of_packets = packets; urb->transfer_buffer_length = bytes; urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb(urb); return NULL; } if (offset) { memset(urb->transfer_buffer, GUARD_BYTE, offset); urb->transfer_buffer += offset; urb->transfer_dma += offset; } /* For inbound transfers use guard byte so that test fails if data not correctly copied */ memset(urb->transfer_buffer, usb_pipein(urb->pipe) ? GUARD_BYTE : 0, bytes); for (i = 0; i < packets; i++) { /* here, only the last packet will be short */ urb->iso_frame_desc[i].length = min_t(unsigned int, bytes, maxp); bytes -= urb->iso_frame_desc[i].length; urb->iso_frame_desc[i].offset = maxp * i; } urb->complete = complicated_callback; /* urb->context = SET BY CALLER */ urb->interval = 1 << (desc->bInterval - 1); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; return urb; } static int test_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param, int pipe, struct usb_endpoint_descriptor *desc, unsigned offset) { struct transfer_context context; struct usb_device *udev; unsigned i; unsigned long packets = 0; int status = 0; struct urb **urbs; if (!param->sglen || param->iterations > UINT_MAX / param->sglen) return -EINVAL; if (param->sglen > MAX_SGLEN) return -EINVAL; urbs = kcalloc(param->sglen, sizeof(*urbs), GFP_KERNEL); if (!urbs) return -ENOMEM; memset(&context, 0, sizeof(context)); context.count = param->iterations * param->sglen; context.dev = dev; context.is_iso = !!desc; init_completion(&context.done); spin_lock_init(&context.lock); udev = testdev_to_usbdev(dev); for (i = 0; i < param->sglen; i++) { if (context.is_iso) urbs[i] = iso_alloc_urb(udev, pipe, desc, param->length, offset); else urbs[i] = complicated_alloc_urb(udev, pipe, param->length, 0); if (!urbs[i]) { status = -ENOMEM; goto fail; } packets += urbs[i]->number_of_packets; urbs[i]->context = &context; } packets *= param->iterations; if (context.is_iso) { int transaction_num; if (udev->speed >= USB_SPEED_SUPER) transaction_num = ss_isoc_get_packet_num(udev, pipe); else transaction_num = usb_endpoint_maxp_mult(desc); dev_info(&dev->intf->dev, "iso period %d %sframes, wMaxPacket %d, transactions: %d\n", 1 << (desc->bInterval - 1), (udev->speed >= USB_SPEED_HIGH) ? "micro" : "", usb_endpoint_maxp(desc), transaction_num); dev_info(&dev->intf->dev, "total %lu msec (%lu packets)\n", (packets * (1 << (desc->bInterval - 1))) / ((udev->speed >= USB_SPEED_HIGH) ? 8 : 1), packets); } spin_lock_irq(&context.lock); for (i = 0; i < param->sglen; i++) { ++context.pending; status = usb_submit_urb(urbs[i], GFP_ATOMIC); if (status < 0) { ERROR(dev, "submit iso[%d], error %d\n", i, status); if (i == 0) { spin_unlock_irq(&context.lock); goto fail; } simple_free_urb(urbs[i]); urbs[i] = NULL; context.pending--; context.submit_error = 1; break; } } spin_unlock_irq(&context.lock); wait_for_completion(&context.done); for (i = 0; i < param->sglen; i++) { if (urbs[i]) simple_free_urb(urbs[i]); } /* * Isochronous transfers are expected to fail sometimes. As an * arbitrary limit, we will report an error if any submissions * fail or if the transfer failure rate is > 10%. */ if (status != 0) ; else if (context.submit_error) status = -EACCES; else if (context.errors > (context.is_iso ? context.packet_count / 10 : 0)) status = -EIO; kfree(urbs); return status; fail: for (i = 0; i < param->sglen; i++) { if (urbs[i]) simple_free_urb(urbs[i]); } kfree(urbs); return status; } static int test_unaligned_bulk( struct usbtest_dev *tdev, int pipe, unsigned length, int iterations, unsigned transfer_flags, const char *label) { int retval; struct urb *urb = usbtest_alloc_urb(testdev_to_usbdev(tdev), pipe, length, transfer_flags, 1, 0, simple_callback); if (!urb) return -ENOMEM; retval = simple_io(tdev, urb, iterations, 0, 0, label); simple_free_urb(urb); return retval; } /* Run tests. */ static int usbtest_do_ioctl(struct usb_interface *intf, struct usbtest_param_32 *param) { struct usbtest_dev *dev = usb_get_intfdata(intf); struct usb_device *udev = testdev_to_usbdev(dev); struct urb *urb; struct scatterlist *sg; struct usb_sg_request req; unsigned i; int retval = -EOPNOTSUPP; if (param->iterations <= 0) return -EINVAL; if (param->sglen > MAX_SGLEN) return -EINVAL; /* * Just a bunch of test cases that every HCD is expected to handle. * * Some may need specific firmware, though it'd be good to have * one firmware image to handle all the test cases. * * FIXME add more tests! cancel requests, verify the data, control * queueing, concurrent read+write threads, and so on. */ switch (param->test_num) { case 0: dev_info(&intf->dev, "TEST 0: NOP\n"); retval = 0; break; /* Simple non-queued bulk I/O tests */ case 1: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 1: write %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test1"); simple_free_urb(urb); break; case 2: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 2: read %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test2"); simple_free_urb(urb); break; case 3: if (dev->out_pipe == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 3: write/%d 0..%d bytes %u times\n", param->vary, param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, param->vary, 0, "test3"); simple_free_urb(urb); break; case 4: if (dev->in_pipe == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 4: read/%d 0..%d bytes %u times\n", param->vary, param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, param->vary, 0, "test4"); simple_free_urb(urb); break; /* Queued bulk I/O tests */ case 5: if (dev->out_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 5: write %d sglists %d entries of %d bytes\n", param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, 0, dev, dev->out_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = perform_sglist(dev, param->iterations, dev->out_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 6: if (dev->in_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 6: read %d sglists %d entries of %d bytes\n", param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, 0, dev, dev->in_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = perform_sglist(dev, param->iterations, dev->in_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 7: if (dev->out_pipe == 0 || param->sglen == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 7: write/%d %d sglists %d entries 0..%d bytes\n", param->vary, param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, param->vary, dev, dev->out_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = perform_sglist(dev, param->iterations, dev->out_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 8: if (dev->in_pipe == 0 || param->sglen == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 8: read/%d %d sglists %d entries 0..%d bytes\n", param->vary, param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, param->vary, dev, dev->in_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = perform_sglist(dev, param->iterations, dev->in_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; /* non-queued sanity tests for control (chapter 9 subset) */ case 9: retval = 0; dev_info(&intf->dev, "TEST 9: ch9 (subset) control tests, %d times\n", param->iterations); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = ch9_postconfig(dev); if (retval) dev_err(&intf->dev, "ch9 subset failed, " "iterations left %d\n", i); break; /* queued control messaging */ case 10: retval = 0; dev_info(&intf->dev, "TEST 10: queue %d control calls, %d times\n", param->sglen, param->iterations); retval = test_ctrl_queue(dev, param); break; /* simple non-queued unlinks (ring with one urb) */ case 11: if (dev->in_pipe == 0 || !param->length) break; retval = 0; dev_info(&intf->dev, "TEST 11: unlink %d reads of %d\n", param->iterations, param->length); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = unlink_simple(dev, dev->in_pipe, param->length); if (retval) dev_err(&intf->dev, "unlink reads failed %d, " "iterations left %d\n", retval, i); break; case 12: if (dev->out_pipe == 0 || !param->length) break; retval = 0; dev_info(&intf->dev, "TEST 12: unlink %d writes of %d\n", param->iterations, param->length); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = unlink_simple(dev, dev->out_pipe, param->length); if (retval) dev_err(&intf->dev, "unlink writes failed %d, " "iterations left %d\n", retval, i); break; /* ep halt tests */ case 13: if (dev->out_pipe == 0 && dev->in_pipe == 0) break; retval = 0; dev_info(&intf->dev, "TEST 13: set/clear %d halts\n", param->iterations); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = halt_simple(dev); if (retval) ERROR(dev, "halts failed, iterations left %d\n", i); break; /* control write tests */ case 14: if (!dev->info->ctrl_out) break; dev_info(&intf->dev, "TEST 14: %d ep0out, %d..%d vary %d\n", param->iterations, realworld ? 1 : 0, param->length, param->vary); retval = ctrl_out(dev, param->iterations, param->length, param->vary, 0); break; /* iso write tests */ case 15: if (dev->out_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 15: write %d iso, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); /* FIRMWARE: iso sink */ retval = test_queue(dev, param, dev->out_iso_pipe, dev->iso_out, 0); break; /* iso read tests */ case 16: if (dev->in_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 16: read %d iso, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); /* FIRMWARE: iso source */ retval = test_queue(dev, param, dev->in_iso_pipe, dev->iso_in, 0); break; /* FIXME scatterlist cancel (needs helper thread) */ /* Tests for bulk I/O using DMA mapping by core and odd address */ case 17: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 17: write odd addr %d bytes %u times core map\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->out_pipe, param->length, param->iterations, 0, "test17"); break; case 18: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 18: read odd addr %d bytes %u times core map\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->in_pipe, param->length, param->iterations, 0, "test18"); break; /* Tests for bulk I/O using premapped coherent buffer and odd address */ case 19: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 19: write odd addr %d bytes %u times premapped\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->out_pipe, param->length, param->iterations, URB_NO_TRANSFER_DMA_MAP, "test19"); break; case 20: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 20: read odd addr %d bytes %u times premapped\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->in_pipe, param->length, param->iterations, URB_NO_TRANSFER_DMA_MAP, "test20"); break; /* control write tests with unaligned buffer */ case 21: if (!dev->info->ctrl_out) break; dev_info(&intf->dev, "TEST 21: %d ep0out odd addr, %d..%d vary %d\n", param->iterations, realworld ? 1 : 0, param->length, param->vary); retval = ctrl_out(dev, param->iterations, param->length, param->vary, 1); break; /* unaligned iso tests */ case 22: if (dev->out_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 22: write %d iso odd, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); retval = test_queue(dev, param, dev->out_iso_pipe, dev->iso_out, 1); break; case 23: if (dev->in_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 23: read %d iso odd, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); retval = test_queue(dev, param, dev->in_iso_pipe, dev->iso_in, 1); break; /* unlink URBs from a bulk-OUT queue */ case 24: if (dev->out_pipe == 0 || !param->length || param->sglen < 4) break; retval = 0; dev_info(&intf->dev, "TEST 24: unlink from %d queues of " "%d %d-byte writes\n", param->iterations, param->sglen, param->length); for (i = param->iterations; retval == 0 && i > 0; --i) { retval = unlink_queued(dev, dev->out_pipe, param->sglen, param->length); if (retval) { dev_err(&intf->dev, "unlink queued writes failed %d, " "iterations left %d\n", retval, i); break; } } break; /* Simple non-queued interrupt I/O tests */ case 25: if (dev->out_int_pipe == 0) break; dev_info(&intf->dev, "TEST 25: write %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_int_pipe, param->length, dev->int_out->bInterval); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: interrupt sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test25"); simple_free_urb(urb); break; case 26: if (dev->in_int_pipe == 0) break; dev_info(&intf->dev, "TEST 26: read %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_int_pipe, param->length, dev->int_in->bInterval); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: interrupt source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test26"); simple_free_urb(urb); break; case 27: /* We do performance test, so ignore data compare */ if (dev->out_pipe == 0 || param->sglen == 0 || pattern != 0) break; dev_info(&intf->dev, "TEST 27: bulk write %dMbytes\n", (param->iterations * param->sglen * param->length) / (1024 * 1024)); retval = test_queue(dev, param, dev->out_pipe, NULL, 0); break; case 28: if (dev->in_pipe == 0 || param->sglen == 0 || pattern != 0) break; dev_info(&intf->dev, "TEST 28: bulk read %dMbytes\n", (param->iterations * param->sglen * param->length) / (1024 * 1024)); retval = test_queue(dev, param, dev->in_pipe, NULL, 0); break; /* Test data Toggle/seq_nr clear between bulk out transfers */ case 29: if (dev->out_pipe == 0) break; retval = 0; dev_info(&intf->dev, "TEST 29: Clear toggle between bulk writes %d times\n", param->iterations); for (i = param->iterations; retval == 0 && i > 0; --i) retval = toggle_sync_simple(dev); if (retval) ERROR(dev, "toggle sync failed, iterations left %d\n", i); break; } return retval; } /*-------------------------------------------------------------------------*/ /* We only have this one interface to user space, through usbfs. * User mode code can scan usbfs to find N different devices (maybe on * different busses) to use when testing, and allocate one thread per * test. So discovery is simplified, and we have no device naming issues. * * Don't use these only as stress/load tests. Use them along with * other USB bus activity: plugging, unplugging, mousing, mp3 playback, * video capture, and so on. Run different tests at different times, in * different sequences. Nothing here should interact with other devices, * except indirectly by consuming USB bandwidth and CPU resources for test * threads and request completion. But the only way to know that for sure * is to test when HC queues are in use by many devices. * * WARNING: Because usbfs grabs udev->dev.sem before calling this ioctl(), * it locks out usbcore in certain code paths. Notably, if you disconnect * the device-under-test, hub_wq will wait block forever waiting for the * ioctl to complete ... so that usb_disconnect() can abort the pending * urbs and then call usbtest_disconnect(). To abort a test, you're best * off just killing the userspace task and waiting for it to exit. */ static int usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) { struct usbtest_dev *dev = usb_get_intfdata(intf); struct usbtest_param_64 *param_64 = buf; struct usbtest_param_32 temp; struct usbtest_param_32 *param_32 = buf; struct timespec64 start; struct timespec64 end; struct timespec64 duration; int retval = -EOPNOTSUPP; /* FIXME USBDEVFS_CONNECTINFO doesn't say how fast the device is. */ pattern = mod_pattern; if (mutex_lock_interruptible(&dev->lock)) return -ERESTARTSYS; /* FIXME: What if a system sleep starts while a test is running? */ /* some devices, like ez-usb default devices, need a non-default * altsetting to have any active endpoints. some tests change * altsettings; force a default so most tests don't need to check. */ if (dev->info->alt >= 0) { if (intf->altsetting->desc.bInterfaceNumber) { retval = -ENODEV; goto free_mutex; } retval = set_altsetting(dev, dev->info->alt); if (retval) { dev_err(&intf->dev, "set altsetting to %d failed, %d\n", dev->info->alt, retval); goto free_mutex; } } switch (code) { case USBTEST_REQUEST_64: temp.test_num = param_64->test_num; temp.iterations = param_64->iterations; temp.length = param_64->length; temp.sglen = param_64->sglen; temp.vary = param_64->vary; param_32 = &temp; break; case USBTEST_REQUEST_32: break; default: retval = -EOPNOTSUPP; goto free_mutex; } ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); if (retval < 0) goto free_mutex; ktime_get_ts64(&end); duration = timespec64_sub(end, start); temp.duration_sec = duration.tv_sec; temp.duration_usec = duration.tv_nsec/NSEC_PER_USEC; switch (code) { case USBTEST_REQUEST_32: param_32->duration_sec = temp.duration_sec; param_32->duration_usec = temp.duration_usec; break; case USBTEST_REQUEST_64: param_64->duration_sec = temp.duration_sec; param_64->duration_usec = temp.duration_usec; break; } free_mutex: mutex_unlock(&dev->lock); return retval; } /*-------------------------------------------------------------------------*/ static unsigned force_interrupt; module_param(force_interrupt, uint, 0); MODULE_PARM_DESC(force_interrupt, "0 = test default; else interrupt"); #ifdef GENERIC static unsigned short vendor; module_param(vendor, ushort, 0); MODULE_PARM_DESC(vendor, "vendor code (from usb-if)"); static unsigned short product; module_param(product, ushort, 0); MODULE_PARM_DESC(product, "product code (from vendor)"); #endif static int usbtest_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev; struct usbtest_dev *dev; struct usbtest_info *info; char *rtest, *wtest; char *irtest, *iwtest; char *intrtest, *intwtest; udev = interface_to_usbdev(intf); #ifdef GENERIC /* specify devices by module parameters? */ if (id->match_flags == 0) { /* vendor match required, product match optional */ if (!vendor || le16_to_cpu(udev->descriptor.idVendor) != (u16)vendor) return -ENODEV; if (product && le16_to_cpu(udev->descriptor.idProduct) != (u16)product) return -ENODEV; dev_info(&intf->dev, "matched module params, " "vend=0x%04x prod=0x%04x\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); } #endif dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; info = (struct usbtest_info *) id->driver_info; dev->info = info; mutex_init(&dev->lock); dev->intf = intf; /* cacheline-aligned scratch for i/o */ dev->buf = kmalloc(TBUF_SIZE, GFP_KERNEL); if (dev->buf == NULL) { kfree(dev); return -ENOMEM; } /* NOTE this doesn't yet test the handful of difference that are * visible with high speed interrupts: bigger maxpacket (1K) and * "high bandwidth" modes (up to 3 packets/uframe). */ rtest = wtest = ""; irtest = iwtest = ""; intrtest = intwtest = ""; if (force_interrupt || udev->speed == USB_SPEED_LOW) { if (info->ep_in) { dev->in_pipe = usb_rcvintpipe(udev, info->ep_in); rtest = " intr-in"; } if (info->ep_out) { dev->out_pipe = usb_sndintpipe(udev, info->ep_out); wtest = " intr-out"; } } else { if (override_alt >= 0 || info->autoconf) { int status; status = get_endpoints(dev, intf); if (status < 0) { WARNING(dev, "couldn't get endpoints, %d\n", status); kfree(dev->buf); kfree(dev); return status; } /* may find bulk or ISO pipes */ } else { if (info->ep_in) dev->in_pipe = usb_rcvbulkpipe(udev, info->ep_in); if (info->ep_out) dev->out_pipe = usb_sndbulkpipe(udev, info->ep_out); } if (dev->in_pipe) rtest = " bulk-in"; if (dev->out_pipe) wtest = " bulk-out"; if (dev->in_iso_pipe) irtest = " iso-in"; if (dev->out_iso_pipe) iwtest = " iso-out"; if (dev->in_int_pipe) intrtest = " int-in"; if (dev->out_int_pipe) intwtest = " int-out"; } usb_set_intfdata(intf, dev); dev_info(&intf->dev, "%s\n", info->name); dev_info(&intf->dev, "%s {control%s%s%s%s%s%s%s} tests%s\n", usb_speed_string(udev->speed), info->ctrl_out ? " in/out" : "", rtest, wtest, irtest, iwtest, intrtest, intwtest, info->alt >= 0 ? " (+alt)" : ""); return 0; } static int usbtest_suspend(struct usb_interface *intf, pm_message_t message) { return 0; } static int usbtest_resume(struct usb_interface *intf) { return 0; } static void usbtest_disconnect(struct usb_interface *intf) { struct usbtest_dev *dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); dev_dbg(&intf->dev, "disconnect\n"); kfree(dev->buf); kfree(dev); } /* Basic testing only needs a device that can source or sink bulk traffic. * Any device can test control transfers (default with GENERIC binding). * * Several entries work with the default EP0 implementation that's built * into EZ-USB chips. There's a default vendor ID which can be overridden * by (very) small config EEPROMS, but otherwise all these devices act * identically until firmware is loaded: only EP0 works. It turns out * to be easy to make other endpoints work, without modifying that EP0 * behavior. For now, we expect that kind of firmware. */ /* an21xx or fx versions of ez-usb */ static struct usbtest_info ez1_info = { .name = "EZ-USB device", .ep_in = 2, .ep_out = 2, .alt = 1, }; /* fx2 version of ez-usb */ static struct usbtest_info ez2_info = { .name = "FX2 device", .ep_in = 6, .ep_out = 2, .alt = 1, }; /* ezusb family device with dedicated usb test firmware, */ static struct usbtest_info fw_info = { .name = "usb test device", .ep_in = 2, .ep_out = 2, .alt = 1, .autoconf = 1, /* iso and ctrl_out need autoconf */ .ctrl_out = 1, .iso = 1, /* iso_ep's are #8 in/out */ }; /* peripheral running Linux and 'zero.c' test firmware, or * its user-mode cousin. different versions of this use * different hardware with the same vendor/product codes. * host side MUST rely on the endpoint descriptors. */ static struct usbtest_info gz_info = { .name = "Linux gadget zero", .autoconf = 1, .ctrl_out = 1, .iso = 1, .intr = 1, .alt = 0, }; static struct usbtest_info um_info = { .name = "Linux user mode test driver", .autoconf = 1, .alt = -1, }; static struct usbtest_info um2_info = { .name = "Linux user mode ISO test driver", .autoconf = 1, .iso = 1, .alt = -1, }; #ifdef IBOT2 /* this is a nice source of high speed bulk data; * uses an FX2, with firmware provided in the device */ static struct usbtest_info ibot2_info = { .name = "iBOT2 webcam", .ep_in = 2, .alt = -1, }; #endif #ifdef GENERIC /* we can use any device to test control traffic */ static struct usbtest_info generic_info = { .name = "Generic USB device", .alt = -1, }; #endif static const struct usb_device_id id_table[] = { /*-------------------------------------------------------------*/ /* EZ-USB devices which download firmware to replace (or in our * case augment) the default device implementation. */ /* generic EZ-USB FX controller */ { USB_DEVICE(0x0547, 0x2235), .driver_info = (unsigned long) &ez1_info, }, /* CY3671 development board with EZ-USB FX */ { USB_DEVICE(0x0547, 0x0080), .driver_info = (unsigned long) &ez1_info, }, /* generic EZ-USB FX2 controller (or development board) */ { USB_DEVICE(0x04b4, 0x8613), .driver_info = (unsigned long) &ez2_info, }, /* re-enumerated usb test device firmware */ { USB_DEVICE(0xfff0, 0xfff0), .driver_info = (unsigned long) &fw_info, }, /* "Gadget Zero" firmware runs under Linux */ { USB_DEVICE(0x0525, 0xa4a0), .driver_info = (unsigned long) &gz_info, }, /* so does a user-mode variant */ { USB_DEVICE(0x0525, 0xa4a4), .driver_info = (unsigned long) &um_info, }, /* ... and a user-mode variant that talks iso */ { USB_DEVICE(0x0525, 0xa4a3), .driver_info = (unsigned long) &um2_info, }, #ifdef KEYSPAN_19Qi /* Keyspan 19qi uses an21xx (original EZ-USB) */ /* this does not coexist with the real Keyspan 19qi driver! */ { USB_DEVICE(0x06cd, 0x010b), .driver_info = (unsigned long) &ez1_info, }, #endif /*-------------------------------------------------------------*/ #ifdef IBOT2 /* iBOT2 makes a nice source of high speed bulk-in data */ /* this does not coexist with a real iBOT2 driver! */ { USB_DEVICE(0x0b62, 0x0059), .driver_info = (unsigned long) &ibot2_info, }, #endif /*-------------------------------------------------------------*/ #ifdef GENERIC /* module params can specify devices to use for control tests */ { .driver_info = (unsigned long) &generic_info, }, #endif /*-------------------------------------------------------------*/ { } }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver usbtest_driver = { .name = "usbtest", .id_table = id_table, .probe = usbtest_probe, .unlocked_ioctl = usbtest_ioctl, .disconnect = usbtest_disconnect, .suspend = usbtest_suspend, .resume = usbtest_resume, }; /*-------------------------------------------------------------------------*/ static int __init usbtest_init(void) { #ifdef GENERIC if (vendor) pr_debug("params: vend=0x%04x prod=0x%04x\n", vendor, product); #endif return usb_register(&usbtest_driver); } module_init(usbtest_init); static void __exit usbtest_exit(void) { usb_deregister(&usbtest_driver); } module_exit(usbtest_exit); MODULE_DESCRIPTION("USB Core/HCD Testing Driver"); MODULE_LICENSE("GPL");
7 1 7 7 7 5 1 7 7 7 7 7 7 7 7 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA508 chip based cameras subdriver * * Copyright (C) 2009 Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca508" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA508 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 subtype; #define CreativeVista 0 #define HamaUSBSightcam 1 #define HamaUSBSightcam2 2 #define IntelEasyPCCamera 3 #define MicroInnovationIC200 4 #define ViewQuestVQ110 5 }; static const struct v4l2_pix_format sif_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* Frame packet header offsets for the spca508 */ #define SPCA508_OFFSET_DATA 37 /* * Initialization data: this is the first set-up data written to the * device (before the open data). */ static const u16 spca508_init_data[][2] = { {0x0000, 0x870b}, {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8110}, /* Disable all outputs */ /* READ {0x0000, 0x8114} -> 0000: 00 */ {0x0000, 0x8114}, /* SW GPIO data */ {0x0008, 0x8110}, /* Enable charge pump output */ {0x0002, 0x8116}, /* 200 kHz pump clock */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE:) */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8111}, /* Normal mode (not reset) */ {0x0098, 0x8110}, /* Enable charge pump output, sync.serial,external 2x clock */ {0x000d, 0x8114}, /* SW GPIO data */ {0x0002, 0x8116}, /* 200 kHz pump clock */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* --------------------------------------- */ {0x000f, 0x8402}, /* memory bank */ {0x0000, 0x8403}, /* ... address */ /* --------------------------------------- */ /* 0x88__ is Synchronous Serial Interface. */ /* TBD: This table could be expressed more compactly */ /* using spca508_write_i2c_vector(). */ /* TBD: Should see if the values in spca50x_i2c_data */ /* would work with the VQ110 instead of the values */ /* below. */ {0x00c0, 0x8804}, /* SSI slave addr */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0080, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0000, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0011, 0x8801}, /* SSI reg addr */ {0x0040, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0013, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0014, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0015, 0x8801}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0016, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0017, 0x8801}, {0x0036, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0018, 0x8801}, {0x00ec, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001a, 0x8801}, {0x0094, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001b, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0027, 0x8801}, {0x00a2, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0028, 0x8801}, {0x0040, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002b, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002c, 0x8801}, {0x00fe, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002d, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0038, 0x8801}, {0x0083, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0033, 0x8801}, {0x0081, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0034, 0x8801}, {0x004a, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0039, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0010, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0006, 0x8801}, {0x0058, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0000, 0x8801}, {0x0004, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0040, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0041, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0042, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0043, 0x8801}, {0x0028, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0044, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0045, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0046, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0047, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0048, 0x8801}, {0x004c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0049, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004b, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* --------------------------------------- */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0001, 0x870c}, /* CKOx2 output */ /* --------------------------------------- */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x0001, 0x8606}, /* reserved */ {0x0064, 0x8607}, /* Line memory read counter (H) 0x6480=25,728 */ {0x002a, 0x8601}, /* CDSP sharp interpolation mode, * line sel for color sep, edge enhance enab */ {0x0000, 0x8602}, /* optical black level for user settng = 0 */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x000a, 0x8603}, /* optical black level calc mode: * auto; optical black offset = 10 */ {0x00df, 0x865b}, /* Horiz offset for valid pixels (L)=0xdf */ {0x0012, 0x865c}, /* Vert offset for valid lines (L)=0x12 */ /* The following two lines seem to be the "wrong" resolution. */ /* But perhaps these indicate the actual size of the sensor */ /* rather than the size of the current video mode. */ {0x0058, 0x865d}, /* Horiz valid pixels (*4) (L) = 352 */ {0x0048, 0x865e}, /* Vert valid lines (*4) (L) = 288 */ {0x0015, 0x8608}, /* A11 Coef ... */ {0x0030, 0x8609}, {0x00fb, 0x860a}, {0x003e, 0x860b}, {0x00ce, 0x860c}, {0x00f4, 0x860d}, {0x00eb, 0x860e}, {0x00dc, 0x860f}, {0x0039, 0x8610}, {0x0001, 0x8611}, /* R offset for white balance ... */ {0x0000, 0x8612}, {0x0001, 0x8613}, {0x0000, 0x8614}, {0x005b, 0x8651}, /* R gain for white balance ... */ {0x0040, 0x8652}, {0x0060, 0x8653}, {0x0040, 0x8654}, {0x0000, 0x8655}, {0x0001, 0x863f}, /* Fixed gamma correction enable, USB control, * lum filter disable, lum noise clip disable */ {0x00a1, 0x8656}, /* Window1 size 256x256, Windows2 size 64x64, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8657}, /* Edge gain high thresh */ {0x0020, 0x8658}, /* Edge gain low thresh */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ /* -------------------------------- */ {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xa908, 0x8802}, {0x0034, 0x8801}, /* SSI reg addr */ {0x00ca, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x1f08, 0x8802}, {0x0006, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* ----- Read back coefs we wrote earlier. */ /* READ { 0x0000, 0x8608 } -> 0000: 15 */ /* READ { 0x0000, 0x8609 } -> 0000: 30 */ /* READ { 0x0000, 0x860a } -> 0000: fb */ /* READ { 0x0000, 0x860b } -> 0000: 3e */ /* READ { 0x0000, 0x860c } -> 0000: ce */ /* READ { 0x0000, 0x860d } -> 0000: f4 */ /* READ { 0x0000, 0x860e } -> 0000: eb */ /* READ { 0x0000, 0x860f } -> 0000: dc */ /* READ { 0x0000, 0x8610 } -> 0000: 39 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xb008, 0x8802}, {0x0006, 0x8801}, {0x007d, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* This chunk is seemingly redundant with */ /* earlier commands (A11 Coef...), but if I disable it, */ /* the image appears too dark. Maybe there was some kind of */ /* reset since the earlier commands, so this is necessary again. */ {0x0015, 0x8608}, {0x0030, 0x8609}, {0xfffb, 0x860a}, {0x003e, 0x860b}, {0xffce, 0x860c}, {0xfff4, 0x860d}, {0xffeb, 0x860e}, {0xffdc, 0x860f}, {0x0039, 0x8610}, {0x0018, 0x8657}, {0x0000, 0x8508}, /* Disable compression. */ /* Previous line was: {0x0021, 0x8508}, * Enable compression. */ {0x0032, 0x850b}, /* compression stuff */ {0x0003, 0x8509}, /* compression stuff */ {0x0011, 0x850a}, /* compression stuff */ {0x0021, 0x850d}, /* compression stuff */ {0x0010, 0x850c}, /* compression stuff */ {0x0003, 0x8500}, /* *** Video mode: 160x120 */ {0x0001, 0x8501}, /* Hardware-dominated snap control */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8617}, /* Window1 start X (*2) */ {0x0008, 0x8618}, /* Window1 start Y (*2) */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0058, 0x8619}, /* Window2 start X (*2) */ {0x0008, 0x861a}, /* Window2 start Y (*2) */ {0x00ff, 0x8615}, /* High lum thresh for white balance */ {0x0000, 0x8616}, /* Low lum thresh for white balance */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ /* READ { 0x0000, 0x8656 } -> 0000: 61 */ {0x0028, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ {0x0010, 0x8801}, /* SSI reg addr */ {0x003e, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0000, 0x8801}, {0x001f, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0001, 0x8602}, /* optical black level for user settning = 1 */ /* Original: */ {0x0023, 0x8700}, /* Clock speed 48Mhz/(3+2)/4= 2.4 Mhz */ {0x000f, 0x8602}, /* optical black level for user settning = 15 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0010, 0x8801}, {0x007b, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x002f, 0x8651}, /* R gain for white balance ... */ {0x0080, 0x8653}, /* READ { 0x0000, 0x8655 } -> 0000: 00 */ {0x0000, 0x8655}, {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE: (ALT=0) ) */ {} }; /* * Initialization data for Intel EasyPC Camera CS110 */ static const u16 spca508cs110_init_data[][2] = { {0x0000, 0x870b}, /* Reset CTL3 */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8111}, /* Normal operation on reset */ {0x0090, 0x8110}, /* External Clock 2x & Synchronous Serial Interface Output */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, /* Initial sequence Synchronous Serial Interface */ {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock two DataByte */ {0x0001, 0x8801}, {0x000a, 0x8805}, /* a - NWG: Dunno what this is about */ {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* 3 Vertical Offset for Valid Lines(L) */ {0x0058, 0x865d}, /* 58 Horizontal Valid Pixel Window(L) */ {0x0006, 0x8660}, /* Nibble data + input order */ {0x000a, 0x8602}, /* Optical black level set to 0x0a */ {0x0000, 0x8603}, /* Optical black level Offset */ /* {0x0000, 0x8611}, * 0 R Offset for white Balance */ /* {0x0000, 0x8612}, * 1 Gr Offset for white Balance */ /* {0x0000, 0x8613}, * 1f B Offset for white Balance */ /* {0x0000, 0x8614}, * f0 Gb Offset for white Balance */ {0x0040, 0x8651}, /* 2b BLUE gain for white balance good at all 60 */ {0x0030, 0x8652}, /* 41 Gr Gain for white Balance (L) */ {0x0035, 0x8653}, /* 26 RED gain for white balance */ {0x0035, 0x8654}, /* 40Gb Gain for white Balance (L) */ {0x0041, 0x863f}, /* Fixed Gamma correction enabled (makes colours look better) */ {0x0000, 0x8655}, /* High bits for white balance*****brightness control*** */ {} }; static const u16 spca508_sightcam_init_data[][2] = { /* This line seems to setup the frame/canvas */ {0x000f, 0x8402}, /* These 6 lines are needed to startup the webcam */ {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, /* This part seems to make the pictures darker? (autobrightness?) */ {0x0001, 0x8801}, {0x0004, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, {0x0004, 0x8801}, {0x00b4, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x00e0, 0x8800}, {0x0007, 0x8801}, {0x000c, 0x8800}, /* This section is just needed, it probably * does something like the previous section, * but the cam won't start if it's not included. */ {0x0014, 0x8801}, {0x0008, 0x8800}, {0x0015, 0x8801}, {0x0067, 0x8800}, {0x0016, 0x8801}, {0x0000, 0x8800}, {0x0017, 0x8801}, {0x0020, 0x8800}, {0x0018, 0x8801}, {0x0044, 0x8800}, /* Makes the picture darker - and the * cam won't start if not included */ {0x001e, 0x8801}, {0x00ea, 0x8800}, {0x001f, 0x8801}, {0x0001, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, /* seems to place the colors ontop of each other #1 */ {0x0006, 0x8704}, {0x0001, 0x870c}, {0x0016, 0x8600}, {0x0002, 0x8606}, /* if not included the pictures becomes _very_ dark */ {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0000, 0x8602}, /* seems to place the colors ontop of each other #2 */ {0x0016, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, /* webcam won't start if not included */ {0x0007, 0x865b}, {0x0001, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, /* adjusts the colors */ {0x0049, 0x8651}, {0x0040, 0x8652}, {0x004c, 0x8653}, {0x0040, 0x8654}, {} }; static const u16 spca508_sightcam2_init_data[][2] = { {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0008, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0009, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000a, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000b, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000c, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000d, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000e, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0007, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000f, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0018, 0x8660}, {0x0010, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0011, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0000, 0x86b0}, {0x0034, 0x86b1}, {0x0000, 0x86b2}, {0x0049, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0012, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0013, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0001, 0x86b0}, {0x00aa, 0x86b1}, {0x0000, 0x86b2}, {0x00e4, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0018, 0x8660}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x0000, 0x8800}, {0x0008, 0x8801}, {0x0005, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x000c, 0x8619}, {0x0004, 0x861a}, {0x00a1, 0x8656}, {0x00c8, 0x8615}, {0x0032, 0x8616}, {0x0030, 0x8112}, {0x0020, 0x8112}, {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x0064, 0x8619}, /* This line starts it all, it is not needed here */ /* since it has been build into the driver */ /* jfm: don't start now */ /* {0x0030, 0x8112}, */ {} }; /* * Initialization data for Creative Webcam Vista */ static const u16 spca508_vista_init_data[][2] = { {0x0008, 0x8200}, /* Clear register */ {0x0000, 0x870b}, /* Reset CTL3 */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8110}, /* Disable everything */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8114}, {0x0003, 0x8111}, {0x0000, 0x8111}, {0x0090, 0x8110}, /* Enable: SSI output, External 2X clock output */ {0x0020, 0x8112}, {0x0000, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, /* Will write 2 bytes (DATA1+DATA2) */ {0x0020, 0x8801}, /* Register address for SSI read/write */ {0x0044, 0x8805}, /* DATA2 */ {0x0004, 0x8800}, /* DATA1 -> write triggered */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003c, 0x8801}, {0x0001, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0001, 0x8801}, {0x000a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x000e, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0028, 0x8801}, {0x002e, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0039, 0x8801}, {0x0013, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003b, 0x8801}, {0x000c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0035, 0x8801}, {0x0028, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0050, 0x8703}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x870c}, /* Select CKOx2 output */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x0023, 0x8601}, {0x0010, 0x8602}, {0x000a, 0x8603}, {0x009a, 0x8600}, {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* Vertical offset for valid lines (L) */ {0x0058, 0x865d}, /* Horizontal valid pixels window (L) */ {0x0048, 0x865e}, /* Vertical valid lines window (L) */ {0x0000, 0x865f}, {0x0006, 0x8660}, /* Enable nibble data input, select nibble input order */ {0x0013, 0x8608}, /* A11 Coeficients for color correction */ {0x0028, 0x8609}, /* Note: these values are confirmed at the end of array */ {0x0005, 0x860a}, /* ... */ {0x0025, 0x860b}, {0x00e1, 0x860c}, {0x00fa, 0x860d}, {0x00f4, 0x860e}, {0x00e8, 0x860f}, {0x0025, 0x8610}, /* A33 Coef. */ {0x00fc, 0x8611}, /* White balance offset: R */ {0x0001, 0x8612}, /* White balance offset: Gr */ {0x00fe, 0x8613}, /* White balance offset: B */ {0x0000, 0x8614}, /* White balance offset: Gb */ {0x0064, 0x8651}, /* R gain for white balance (L) */ {0x0040, 0x8652}, /* Gr gain for white balance (L) */ {0x0066, 0x8653}, /* B gain for white balance (L) */ {0x0040, 0x8654}, /* Gb gain for white balance (L) */ {0x0001, 0x863f}, /* Enable fixed gamma correction */ {0x00a1, 0x8656}, /* Size - Window1: 256x256, Window2: 128x128, * UV division: UV no change, * Enable New edge enhancement */ {0x0018, 0x8657}, /* Edge gain high threshold */ {0x0020, 0x8658}, /* Edge gain low threshold */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ {0x0064, 0x8607}, /* UV filter enable */ {0x0016, 0x8660}, {0x0000, 0x86b0}, /* Bad pixels compensation address */ {0x00dc, 0x86b1}, /* X coord for bad pixels compensation (L) */ {0x0000, 0x86b2}, {0x0009, 0x86b3}, /* Y coord for bad pixels compensation (L) */ {0x0000, 0x86b4}, {0x0001, 0x86b0}, {0x00f5, 0x86b1}, {0x0000, 0x86b2}, {0x00c6, 0x86b3}, {0x0000, 0x86b4}, {0x0002, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d7, 0x86b3}, {0x0000, 0x86b4}, {0x0003, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x0004, 0x86b0}, {0x001d, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x001e, 0x8660}, /* READ { 0x0000, 0x8608 } -> 0000: 13 */ /* READ { 0x0000, 0x8609 } -> 0000: 28 */ /* READ { 0x0000, 0x8610 } -> 0000: 05 */ /* READ { 0x0000, 0x8611 } -> 0000: 25 */ /* READ { 0x0000, 0x8612 } -> 0000: e1 */ /* READ { 0x0000, 0x8613 } -> 0000: fa */ /* READ { 0x0000, 0x8614 } -> 0000: f4 */ /* READ { 0x0000, 0x8615 } -> 0000: e8 */ /* READ { 0x0000, 0x8616 } -> 0000: 25 */ {} }; static int reg_write(struct gspca_dev *gspca_dev, u16 index, u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write i:0x%04x = 0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } /* read 1 byte */ /* returns: negative is error, pos or zero is data */ static int reg_read(struct gspca_dev *gspca_dev, u16 index) /* wIndex */ { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* register */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 1, 500); /* timeout */ gspca_dbg(gspca_dev, D_USBI, "reg read i:%04x --> %02x\n", index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_read err %d\n", ret); return ret; } return gspca_dev->usb_buf[0]; } /* send 1 or 2 bytes to the sensor via the Synchronous Serial Interface */ static int ssi_w(struct gspca_dev *gspca_dev, u16 reg, u16 val) { int ret, retry; ret = reg_write(gspca_dev, 0x8802, reg >> 8); if (ret < 0) goto out; ret = reg_write(gspca_dev, 0x8801, reg & 0x00ff); if (ret < 0) goto out; if ((reg & 0xff00) == 0x1000) { /* if 2 bytes */ ret = reg_write(gspca_dev, 0x8805, val & 0x00ff); if (ret < 0) goto out; val >>= 8; } ret = reg_write(gspca_dev, 0x8800, val); if (ret < 0) goto out; /* poll until not busy */ retry = 10; for (;;) { ret = reg_read(gspca_dev, 0x8803); if (ret < 0) break; if (gspca_dev->usb_buf[0] == 0) break; if (--retry <= 0) { gspca_err(gspca_dev, "ssi_w busy %02x\n", gspca_dev->usb_buf[0]); ret = -1; break; } msleep(8); } out: return ret; } static int write_vector(struct gspca_dev *gspca_dev, const u16 (*data)[2]) { int ret = 0; while ((*data)[1] != 0) { if ((*data)[1] & 0x8000) { if ((*data)[1] == 0xdd00) /* delay */ msleep((*data)[0]); else ret = reg_write(gspca_dev, (*data)[1], (*data)[0]); } else { ret = ssi_w(gspca_dev, (*data)[1], (*data)[0]); } if (ret < 0) break; data++; } return ret; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; const u16 (*init_data)[2]; static const u16 (*(init_data_tb[]))[2] = { spca508_vista_init_data, /* CreativeVista 0 */ spca508_sightcam_init_data, /* HamaUSBSightcam 1 */ spca508_sightcam2_init_data, /* HamaUSBSightcam2 2 */ spca508cs110_init_data, /* IntelEasyPCCamera 3 */ spca508cs110_init_data, /* MicroInnovationIC200 4 */ spca508_init_data, /* ViewQuestVQ110 5 */ }; int data1, data2; /* Read from global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 508. */ data1 = reg_read(gspca_dev, 0x8104); data2 = reg_read(gspca_dev, 0x8105); gspca_dbg(gspca_dev, D_PROBE, "Webcam Vendor ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8106); data2 = reg_read(gspca_dev, 0x8107); gspca_dbg(gspca_dev, D_PROBE, "Webcam Product ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8621); gspca_dbg(gspca_dev, D_PROBE, "Window 1 average luminance: %d\n", data1); cam = &gspca_dev->cam; cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); sd->subtype = id->driver_info; init_data = init_data_tb[sd->subtype]; return write_vector(gspca_dev, init_data); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { return 0; } static int sd_start(struct gspca_dev *gspca_dev) { int mode; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_write(gspca_dev, 0x8500, mode); switch (mode) { case 0: case 1: reg_write(gspca_dev, 0x8700, 0x28); /* clock */ break; default: /* case 2: */ /* case 3: */ reg_write(gspca_dev, 0x8700, 0x23); /* clock */ break; } reg_write(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Video ISO disable, Video Drop Packet enable: */ reg_write(gspca_dev, 0x8112, 0x20); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA508_OFFSET_DATA; len -= SPCA508_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); break; case 0xff: /* drop */ break; default: data += 1; len -= 1; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); break; } } static void setbrightness(struct gspca_dev *gspca_dev, s32 brightness) { /* MX seem contrast */ reg_write(gspca_dev, 0x8651, brightness); reg_write(gspca_dev, 0x8652, brightness); reg_write(gspca_dev, 0x8653, brightness); reg_write(gspca_dev, 0x8654, brightness); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0130, 0x0130), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x041e, 0x4018), .driver_info = CreativeVista}, {USB_DEVICE(0x0733, 0x0110), .driver_info = ViewQuestVQ110}, {USB_DEVICE(0x0af9, 0x0010), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x0af9, 0x0011), .driver_info = HamaUSBSightcam2}, {USB_DEVICE(0x8086, 0x0110), .driver_info = IntelEasyPCCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
4 1 3 1 4 2 3 1 1 13 10 8 7 6 4 2 2 2 2 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 // SPDX-License-Identifier: GPL-2.0-only /* * stack_user.c * * Code which interfaces ocfs2 with fs/dlm and a userspace stack. * * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/reboot.h> #include <linux/sched.h> #include <linux/uaccess.h> #include "stackglue.h" #include <linux/dlm_plock.h> /* * The control protocol starts with a handshake. Until the handshake * is complete, the control device will fail all write(2)s. * * The handshake is simple. First, the client reads until EOF. Each line * of output is a supported protocol tag. All protocol tags are a single * character followed by a two hex digit version number. Currently the * only things supported is T01, for "Text-base version 0x01". Next, the * client writes the version they would like to use, including the newline. * Thus, the protocol tag is 'T01\n'. If the version tag written is * unknown, -EINVAL is returned. Once the negotiation is complete, the * client can start sending messages. * * The T01 protocol has three messages. First is the "SETN" message. * It has the following syntax: * * SETN<space><8-char-hex-nodenum><newline> * * This is 14 characters. * * The "SETN" message must be the first message following the protocol. * It tells ocfs2_control the local node number. * * Next comes the "SETV" message. It has the following syntax: * * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> * * This is 11 characters. * * The "SETV" message sets the filesystem locking protocol version as * negotiated by the client. The client negotiates based on the maximum * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major * number from the "SETV" message must match * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number * must be less than or equal to ...sp_max_version.pv_minor. * * Once this information has been set, mounts will be allowed. From this * point on, the "DOWN" message can be sent for node down notification. * It has the following syntax: * * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> * * eg: * * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n * * This is 47 characters. */ /* * Whether or not the client has done the handshake. * For now, we have just one protocol version. */ #define OCFS2_CONTROL_PROTO "T01\n" #define OCFS2_CONTROL_PROTO_LEN 4 /* Handshake states */ #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) #define OCFS2_CONTROL_HANDSHAKE_READ (1) #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) #define OCFS2_CONTROL_HANDSHAKE_VALID (3) /* Messages */ #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 #define OCFS2_TEXT_UUID_LEN 32 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 #define VERSION_LOCK "version_lock" enum ocfs2_connection_type { WITH_CONTROLD, NO_CONTROLD }; /* * ocfs2_live_connection is refcounted because the filesystem and * miscdevice sides can detach in different order. Let's just be safe. */ struct ocfs2_live_connection { struct list_head oc_list; struct ocfs2_cluster_connection *oc_conn; enum ocfs2_connection_type oc_type; atomic_t oc_this_node; int oc_our_slot; struct dlm_lksb oc_version_lksb; char oc_lvb[DLM_LVB_LEN]; struct completion oc_sync_wait; wait_queue_head_t oc_wait; }; struct ocfs2_control_private { struct list_head op_list; int op_state; int op_this_node; struct ocfs2_protocol_version op_proto; }; /* SETN<space><8-char-hex-nodenum><newline> */ struct ocfs2_control_message_setn { char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; char space; char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; char newline; }; /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ struct ocfs2_control_message_setv { char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; char space1; char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; char space2; char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; char newline; }; /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ struct ocfs2_control_message_down { char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; char space1; char uuid[OCFS2_TEXT_UUID_LEN]; char space2; char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; char newline; }; union ocfs2_control_message { char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; struct ocfs2_control_message_setn u_setn; struct ocfs2_control_message_setv u_setv; struct ocfs2_control_message_down u_down; }; static struct ocfs2_stack_plugin ocfs2_user_plugin; static atomic_t ocfs2_control_opened; static int ocfs2_control_this_node = -1; static struct ocfs2_protocol_version running_proto; static LIST_HEAD(ocfs2_live_connection_list); static LIST_HEAD(ocfs2_control_private_list); static DEFINE_MUTEX(ocfs2_control_lock); static inline void ocfs2_control_set_handshake_state(struct file *file, int state) { struct ocfs2_control_private *p = file->private_data; p->op_state = state; } static inline int ocfs2_control_get_handshake_state(struct file *file) { struct ocfs2_control_private *p = file->private_data; return p->op_state; } static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) { size_t len = strlen(name); struct ocfs2_live_connection *c; BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { if ((c->oc_conn->cc_namelen == len) && !strncmp(c->oc_conn->cc_name, name, len)) return c; } return NULL; } /* * ocfs2_live_connection structures are created underneath the ocfs2 * mount path. Since the VFS prevents multiple calls to * fill_super(), we can't get dupes here. */ static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn, struct ocfs2_live_connection *c) { int rc = 0; mutex_lock(&ocfs2_control_lock); c->oc_conn = conn; if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened)) list_add(&c->oc_list, &ocfs2_live_connection_list); else { printk(KERN_ERR "ocfs2: Userspace control daemon is not present\n"); rc = -ESRCH; } mutex_unlock(&ocfs2_control_lock); return rc; } /* * This function disconnects the cluster connection from ocfs2_control. * Afterwards, userspace can't affect the cluster connection. */ static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) { mutex_lock(&ocfs2_control_lock); list_del_init(&c->oc_list); c->oc_conn = NULL; mutex_unlock(&ocfs2_control_lock); kfree(c); } static int ocfs2_control_cfu(void *target, size_t target_len, const char __user *buf, size_t count) { /* The T01 expects write(2) calls to have exactly one command */ if ((count != target_len) || (count > sizeof(union ocfs2_control_message))) return -EINVAL; if (copy_from_user(target, buf, target_len)) return -EFAULT; return 0; } static ssize_t ocfs2_control_validate_protocol(struct file *file, const char __user *buf, size_t count) { ssize_t ret; char kbuf[OCFS2_CONTROL_PROTO_LEN]; ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, buf, count); if (ret) return ret; if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) return -EINVAL; ocfs2_control_set_handshake_state(file, OCFS2_CONTROL_HANDSHAKE_PROTOCOL); return count; } static void ocfs2_control_send_down(const char *uuid, int nodenum) { struct ocfs2_live_connection *c; mutex_lock(&ocfs2_control_lock); c = ocfs2_connection_find(uuid); if (c) { BUG_ON(c->oc_conn == NULL); c->oc_conn->cc_recovery_handler(nodenum, c->oc_conn->cc_recovery_data); } mutex_unlock(&ocfs2_control_lock); } /* * Called whenever configuration elements are sent to /dev/ocfs2_control. * If all configuration elements are present, try to set the global * values. If there is a problem, return an error. Skip any missing * elements, and only bump ocfs2_control_opened when we have all elements * and are successful. */ static int ocfs2_control_install_private(struct file *file) { int rc = 0; int set_p = 1; struct ocfs2_control_private *p = file->private_data; BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); mutex_lock(&ocfs2_control_lock); if (p->op_this_node < 0) { set_p = 0; } else if ((ocfs2_control_this_node >= 0) && (ocfs2_control_this_node != p->op_this_node)) { rc = -EINVAL; goto out_unlock; } if (!p->op_proto.pv_major) { set_p = 0; } else if (!list_empty(&ocfs2_live_connection_list) && ((running_proto.pv_major != p->op_proto.pv_major) || (running_proto.pv_minor != p->op_proto.pv_minor))) { rc = -EINVAL; goto out_unlock; } if (set_p) { ocfs2_control_this_node = p->op_this_node; running_proto.pv_major = p->op_proto.pv_major; running_proto.pv_minor = p->op_proto.pv_minor; } out_unlock: mutex_unlock(&ocfs2_control_lock); if (!rc && set_p) { /* We set the global values successfully */ atomic_inc(&ocfs2_control_opened); ocfs2_control_set_handshake_state(file, OCFS2_CONTROL_HANDSHAKE_VALID); } return rc; } static int ocfs2_control_get_this_node(void) { int rc; mutex_lock(&ocfs2_control_lock); if (ocfs2_control_this_node < 0) rc = -EINVAL; else rc = ocfs2_control_this_node; mutex_unlock(&ocfs2_control_lock); return rc; } static int ocfs2_control_do_setnode_msg(struct file *file, struct ocfs2_control_message_setn *msg) { long nodenum; char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_PROTOCOL) return -EINVAL; if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) return -EINVAL; if ((msg->space != ' ') || (msg->newline != '\n')) return -EINVAL; msg->space = msg->newline = '\0'; nodenum = simple_strtol(msg->nodestr, &ptr, 16); if (!ptr || *ptr) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || (nodenum > INT_MAX) || (nodenum < 0)) return -ERANGE; p->op_this_node = nodenum; return ocfs2_control_install_private(file); } static int ocfs2_control_do_setversion_msg(struct file *file, struct ocfs2_control_message_setv *msg) { long major, minor; char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = &ocfs2_user_plugin.sp_max_proto; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_PROTOCOL) return -EINVAL; if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) return -EINVAL; if ((msg->space1 != ' ') || (msg->space2 != ' ') || (msg->newline != '\n')) return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; major = simple_strtol(msg->major, &ptr, 16); if (!ptr || *ptr) return -EINVAL; minor = simple_strtol(msg->minor, &ptr, 16); if (!ptr || *ptr) return -EINVAL; /* * The major must be between 1 and 255, inclusive. The minor * must be between 0 and 255, inclusive. The version passed in * must be within the maximum version supported by the filesystem. */ if ((major == LONG_MIN) || (major == LONG_MAX) || (major > (u8)-1) || (major < 1)) return -ERANGE; if ((minor == LONG_MIN) || (minor == LONG_MAX) || (minor > (u8)-1) || (minor < 0)) return -ERANGE; if ((major != max->pv_major) || (minor > max->pv_minor)) return -EINVAL; p->op_proto.pv_major = major; p->op_proto.pv_minor = minor; return ocfs2_control_install_private(file); } static int ocfs2_control_do_down_msg(struct file *file, struct ocfs2_control_message_down *msg) { long nodenum; char *p = NULL; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_VALID) return -EINVAL; if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) return -EINVAL; if ((msg->space1 != ' ') || (msg->space2 != ' ') || (msg->newline != '\n')) return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; nodenum = simple_strtol(msg->nodestr, &p, 16); if (!p || *p) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || (nodenum > INT_MAX) || (nodenum < 0)) return -ERANGE; ocfs2_control_send_down(msg->uuid, nodenum); return 0; } static ssize_t ocfs2_control_message(struct file *file, const char __user *buf, size_t count) { ssize_t ret; union ocfs2_control_message msg; /* Try to catch padding issues */ WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); memset(&msg, 0, sizeof(union ocfs2_control_message)); ret = ocfs2_control_cfu(&msg, count, buf, count); if (ret) goto out; if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, OCFS2_CONTROL_MESSAGE_OP_LEN)) ret = ocfs2_control_do_down_msg(file, &msg.u_down); else ret = -EINVAL; out: return ret ? ret : count; } static ssize_t ocfs2_control_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { ssize_t ret; switch (ocfs2_control_get_handshake_state(file)) { case OCFS2_CONTROL_HANDSHAKE_INVALID: ret = -EINVAL; break; case OCFS2_CONTROL_HANDSHAKE_READ: ret = ocfs2_control_validate_protocol(file, buf, count); break; case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: case OCFS2_CONTROL_HANDSHAKE_VALID: ret = ocfs2_control_message(file, buf, count); break; default: BUG(); ret = -EIO; break; } return ret; } /* * This is a naive version. If we ever have a new protocol, we'll expand * it. Probably using seq_file. */ static ssize_t ocfs2_control_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { ssize_t ret; ret = simple_read_from_buffer(buf, count, ppos, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); /* Have we read the whole protocol list? */ if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) ocfs2_control_set_handshake_state(file, OCFS2_CONTROL_HANDSHAKE_READ); return ret; } static int ocfs2_control_release(struct inode *inode, struct file *file) { struct ocfs2_control_private *p = file->private_data; mutex_lock(&ocfs2_control_lock); if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_VALID) goto out; if (atomic_dec_and_test(&ocfs2_control_opened)) { if (!list_empty(&ocfs2_live_connection_list)) { /* XXX: Do bad things! */ printk(KERN_ERR "ocfs2: Unexpected release of ocfs2_control!\n" " Loss of cluster connection requires " "an emergency restart!\n"); emergency_restart(); } /* * Last valid close clears the node number and resets * the locking protocol version */ ocfs2_control_this_node = -1; running_proto.pv_major = 0; running_proto.pv_minor = 0; } out: list_del_init(&p->op_list); file->private_data = NULL; mutex_unlock(&ocfs2_control_lock); kfree(p); return 0; } static int ocfs2_control_open(struct inode *inode, struct file *file) { struct ocfs2_control_private *p; p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); if (!p) return -ENOMEM; p->op_this_node = -1; mutex_lock(&ocfs2_control_lock); file->private_data = p; list_add(&p->op_list, &ocfs2_control_private_list); mutex_unlock(&ocfs2_control_lock); return 0; } static const struct file_operations ocfs2_control_fops = { .open = ocfs2_control_open, .release = ocfs2_control_release, .read = ocfs2_control_read, .write = ocfs2_control_write, .owner = THIS_MODULE, .llseek = default_llseek, }; static struct miscdevice ocfs2_control_device = { .minor = MISC_DYNAMIC_MINOR, .name = "ocfs2_control", .fops = &ocfs2_control_fops, }; static int ocfs2_control_init(void) { int rc; atomic_set(&ocfs2_control_opened, 0); rc = misc_register(&ocfs2_control_device); if (rc) printk(KERN_ERR "ocfs2: Unable to register ocfs2_control device " "(errno %d)\n", -rc); return rc; } static void ocfs2_control_exit(void) { misc_deregister(&ocfs2_control_device); } static void fsdlm_lock_ast_wrapper(void *astarg) { struct ocfs2_dlm_lksb *lksb = astarg; int status = lksb->lksb_fsdlm.sb_status; /* * For now we're punting on the issue of other non-standard errors * where we can't tell if the unlock_ast or lock_ast should be called. * The main "other error" that's possible is EINVAL which means the * function was called with invalid args, which shouldn't be possible * since the caller here is under our control. Other non-standard * errors probably fall into the same category, or otherwise are fatal * which means we can't carry on anyway. */ if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); else lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); } static void fsdlm_blocking_ast_wrapper(void *astarg, int level) { struct ocfs2_dlm_lksb *lksb = astarg; lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); } static int user_dlm_lock(struct ocfs2_cluster_connection *conn, int mode, struct ocfs2_dlm_lksb *lksb, u32 flags, void *name, unsigned int namelen) { if (!lksb->lksb_fsdlm.sb_lvbptr) lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + sizeof(struct dlm_lksb); return dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, flags|DLM_LKF_NODLCKWT, name, namelen, 0, fsdlm_lock_ast_wrapper, lksb, fsdlm_blocking_ast_wrapper); } static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, struct ocfs2_dlm_lksb *lksb, u32 flags) { return dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, flags, &lksb->lksb_fsdlm, lksb); } static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) { return lksb->lksb_fsdlm.sb_status; } static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) { int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; return !invalid; } static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) { if (!lksb->lksb_fsdlm.sb_lvbptr) lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + sizeof(struct dlm_lksb); return (void *)(lksb->lksb_fsdlm.sb_lvbptr); } static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) { } static int user_plock(struct ocfs2_cluster_connection *conn, u64 ino, struct file *file, int cmd, struct file_lock *fl) { /* * This more or less just demuxes the plock request into any * one of three dlm calls. * * Internally, fs/dlm will pass these to a misc device, which * a userspace daemon will read and write to. */ if (cmd == F_CANCELLK) return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl); else if (IS_GETLK(cmd)) return dlm_posix_get(conn->cc_lockspace, ino, file, fl); else if (lock_is_unlock(fl)) return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); else return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); } /* * Compare a requested locking protocol version against the current one. * * If the major numbers are different, they are incompatible. * If the current minor is greater than the request, they are incompatible. * If the current minor is less than or equal to the request, they are * compatible, and the requester should run at the current minor version. */ static int fs_protocol_compare(struct ocfs2_protocol_version *existing, struct ocfs2_protocol_version *request) { if (existing->pv_major != request->pv_major) return 1; if (existing->pv_minor > request->pv_minor) return 1; if (existing->pv_minor < request->pv_minor) request->pv_minor = existing->pv_minor; return 0; } static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver) { struct ocfs2_protocol_version *pv = (struct ocfs2_protocol_version *)lvb; /* * ocfs2_protocol_version has two u8 variables, so we don't * need any endian conversion. */ ver->pv_major = pv->pv_major; ver->pv_minor = pv->pv_minor; } static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb) { struct ocfs2_protocol_version *pv = (struct ocfs2_protocol_version *)lvb; /* * ocfs2_protocol_version has two u8 variables, so we don't * need any endian conversion. */ pv->pv_major = ver->pv_major; pv->pv_minor = ver->pv_minor; } static void sync_wait_cb(void *arg) { struct ocfs2_cluster_connection *conn = arg; struct ocfs2_live_connection *lc = conn->cc_private; complete(&lc->oc_sync_wait); } static int sync_unlock(struct ocfs2_cluster_connection *conn, struct dlm_lksb *lksb, char *name) { int error; struct ocfs2_live_connection *lc = conn->cc_private; error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn); if (error) { printk(KERN_ERR "%s lkid %x error %d\n", name, lksb->sb_lkid, error); return error; } wait_for_completion(&lc->oc_sync_wait); if (lksb->sb_status != -DLM_EUNLOCK) { printk(KERN_ERR "%s lkid %x status %d\n", name, lksb->sb_lkid, lksb->sb_status); return -1; } return 0; } static int sync_lock(struct ocfs2_cluster_connection *conn, int mode, uint32_t flags, struct dlm_lksb *lksb, char *name) { int error, status; struct ocfs2_live_connection *lc = conn->cc_private; error = dlm_lock(conn->cc_lockspace, mode, lksb, flags, name, strlen(name), 0, sync_wait_cb, conn, NULL); if (error) { printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n", name, lksb->sb_lkid, flags, mode, error); return error; } wait_for_completion(&lc->oc_sync_wait); status = lksb->sb_status; if (status && status != -EAGAIN) { printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n", name, lksb->sb_lkid, flags, mode, status); } return status; } static int version_lock(struct ocfs2_cluster_connection *conn, int mode, int flags) { struct ocfs2_live_connection *lc = conn->cc_private; return sync_lock(conn, mode, flags, &lc->oc_version_lksb, VERSION_LOCK); } static int version_unlock(struct ocfs2_cluster_connection *conn) { struct ocfs2_live_connection *lc = conn->cc_private; return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK); } /* get_protocol_version() * * To exchange ocfs2 versioning, we use the LVB of the version dlm lock. * The algorithm is: * 1. Attempt to take the lock in EX mode (non-blocking). * 2. If successful (which means it is the first mount), write the * version number and downconvert to PR lock. * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after * taking the PR lock. */ static int get_protocol_version(struct ocfs2_cluster_connection *conn) { int ret; struct ocfs2_live_connection *lc = conn->cc_private; struct ocfs2_protocol_version pv; running_proto.pv_major = ocfs2_user_plugin.sp_max_proto.pv_major; running_proto.pv_minor = ocfs2_user_plugin.sp_max_proto.pv_minor; lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb; ret = version_lock(conn, DLM_LOCK_EX, DLM_LKF_VALBLK|DLM_LKF_NOQUEUE); if (!ret) { conn->cc_version.pv_major = running_proto.pv_major; conn->cc_version.pv_minor = running_proto.pv_minor; version_to_lvb(&running_proto, lc->oc_lvb); version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK); } else if (ret == -EAGAIN) { ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK); if (ret) goto out; lvb_to_version(lc->oc_lvb, &pv); if ((pv.pv_major != running_proto.pv_major) || (pv.pv_minor > running_proto.pv_minor)) { ret = -EINVAL; goto out; } conn->cc_version.pv_major = pv.pv_major; conn->cc_version.pv_minor = pv.pv_minor; } out: return ret; } static void user_recover_prep(void *arg) { } static void user_recover_slot(void *arg, struct dlm_slot *slot) { struct ocfs2_cluster_connection *conn = arg; printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n", slot->nodeid, slot->slot); conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data); } static void user_recover_done(void *arg, struct dlm_slot *slots, int num_slots, int our_slot, uint32_t generation) { struct ocfs2_cluster_connection *conn = arg; struct ocfs2_live_connection *lc = conn->cc_private; int i; for (i = 0; i < num_slots; i++) if (slots[i].slot == our_slot) { atomic_set(&lc->oc_this_node, slots[i].nodeid); break; } lc->oc_our_slot = our_slot; wake_up(&lc->oc_wait); } static const struct dlm_lockspace_ops ocfs2_ls_ops = { .recover_prep = user_recover_prep, .recover_slot = user_recover_slot, .recover_done = user_recover_done, }; static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { version_unlock(conn); dlm_release_lockspace(conn->cc_lockspace, 2); conn->cc_lockspace = NULL; ocfs2_live_connection_drop(conn->cc_private); conn->cc_private = NULL; return 0; } static int user_cluster_connect(struct ocfs2_cluster_connection *conn) { dlm_lockspace_t *fsdlm; struct ocfs2_live_connection *lc; int rc, ops_rv; BUG_ON(conn == NULL); lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); if (!lc) return -ENOMEM; init_waitqueue_head(&lc->oc_wait); init_completion(&lc->oc_sync_wait); atomic_set(&lc->oc_this_node, 0); conn->cc_private = lc; lc->oc_type = NO_CONTROLD; rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name, DLM_LSFL_NEWEXCL, DLM_LVB_LEN, &ocfs2_ls_ops, conn, &ops_rv, &fsdlm); if (rc) { if (rc == -EEXIST || rc == -EPROTO) printk(KERN_ERR "ocfs2: Unable to create the " "lockspace %s (%d), because a ocfs2-tools " "program is running on this file system " "with the same name lockspace\n", conn->cc_name, rc); goto out; } if (ops_rv == -EOPNOTSUPP) { lc->oc_type = WITH_CONTROLD; printk(KERN_NOTICE "ocfs2: You seem to be using an older " "version of dlm_controld and/or ocfs2-tools." " Please consider upgrading.\n"); } else if (ops_rv) { rc = ops_rv; goto out; } conn->cc_lockspace = fsdlm; rc = ocfs2_live_connection_attach(conn, lc); if (rc) goto out; if (lc->oc_type == NO_CONTROLD) { rc = get_protocol_version(conn); if (rc) { printk(KERN_ERR "ocfs2: Could not determine" " locking version\n"); user_cluster_disconnect(conn); goto out; } wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0)); } /* * running_proto must have been set before we allowed any mounts * to proceed. */ if (fs_protocol_compare(&running_proto, &conn->cc_version)) { printk(KERN_ERR "Unable to mount with fs locking protocol version " "%u.%u because negotiated protocol is %u.%u\n", conn->cc_version.pv_major, conn->cc_version.pv_minor, running_proto.pv_major, running_proto.pv_minor); rc = -EPROTO; ocfs2_live_connection_drop(lc); lc = NULL; } out: if (rc) kfree(lc); return rc; } static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, unsigned int *this_node) { int rc; struct ocfs2_live_connection *lc = conn->cc_private; if (lc->oc_type == WITH_CONTROLD) rc = ocfs2_control_get_this_node(); else if (lc->oc_type == NO_CONTROLD) rc = atomic_read(&lc->oc_this_node); else rc = -EINVAL; if (rc < 0) return rc; *this_node = rc; return 0; } static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, .dlm_lock = user_dlm_lock, .dlm_unlock = user_dlm_unlock, .lock_status = user_dlm_lock_status, .lvb_valid = user_dlm_lvb_valid, .lock_lvb = user_dlm_lvb, .plock = user_plock, .dump_lksb = user_dlm_dump_lksb, }; static struct ocfs2_stack_plugin ocfs2_user_plugin = { .sp_name = "user", .sp_ops = &ocfs2_user_plugin_ops, .sp_owner = THIS_MODULE, }; static int __init ocfs2_user_plugin_init(void) { int rc; rc = ocfs2_control_init(); if (!rc) { rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); if (rc) ocfs2_control_exit(); } return rc; } static void __exit ocfs2_user_plugin_exit(void) { ocfs2_stack_glue_unregister(&ocfs2_user_plugin); ocfs2_control_exit(); } MODULE_AUTHOR("Oracle"); MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); MODULE_LICENSE("GPL"); module_init(ocfs2_user_plugin_init); module_exit(ocfs2_user_plugin_exit);
2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for zydacron remote control * * Copyright (c) 2010 Don Prince <dhprince.devel@yahoo.co.uk> */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" struct zc_device { struct input_dev *input_ep81; unsigned short last_key[4]; }; /* * Zydacron remote control has an invalid HID report descriptor, * that needs fixing before we can parse it. */ static const __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 253 && rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff && rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff && rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) { hid_info(hdev, "fixing up zydacron remote control report descriptor\n"); rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c; rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00; } return rdesc; } #define zc_map_key_clear(c) \ hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) static int zc_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { int i; struct zc_device *zc = hid_get_drvdata(hdev); zc->input_ep81 = hi->input; if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; dbg_hid("zynacron input mapping event [0x%x]\n", usage->hid & HID_USAGE); switch (usage->hid & HID_USAGE) { /* report 2 */ case 0x10: zc_map_key_clear(KEY_MODE); break; case 0x30: zc_map_key_clear(KEY_SCREEN); break; case 0x70: zc_map_key_clear(KEY_INFO); break; /* report 3 */ case 0x04: zc_map_key_clear(KEY_RADIO); break; /* report 4 */ case 0x0d: zc_map_key_clear(KEY_PVR); break; case 0x25: zc_map_key_clear(KEY_TV); break; case 0x47: zc_map_key_clear(KEY_AUDIO); break; case 0x49: zc_map_key_clear(KEY_AUX); break; case 0x4a: zc_map_key_clear(KEY_VIDEO); break; case 0x48: zc_map_key_clear(KEY_DVD); break; case 0x24: zc_map_key_clear(KEY_MENU); break; case 0x32: zc_map_key_clear(KEY_TEXT); break; default: return 0; } for (i = 0; i < 4; i++) zc->last_key[i] = 0; return 1; } static int zc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct zc_device *zc = hid_get_drvdata(hdev); int ret = 0; unsigned key; unsigned short index; if (report->id == data[0]) { /* break keys */ for (index = 0; index < 4; index++) { key = zc->last_key[index]; if (key) { input_event(zc->input_ep81, EV_KEY, key, 0); zc->last_key[index] = 0; } } key = 0; switch (report->id) { case 0x02: case 0x03: switch (data[1]) { case 0x10: key = KEY_MODE; index = 0; break; case 0x30: key = KEY_SCREEN; index = 1; break; case 0x70: key = KEY_INFO; index = 2; break; case 0x04: key = KEY_RADIO; index = 3; break; } if (key) { input_event(zc->input_ep81, EV_KEY, key, 1); zc->last_key[index] = key; } ret = 1; break; } } return ret; } static int zc_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; struct zc_device *zc; zc = devm_kzalloc(&hdev->dev, sizeof(*zc), GFP_KERNEL); if (zc == NULL) { hid_err(hdev, "can't alloc descriptor\n"); return -ENOMEM; } hid_set_drvdata(hdev, zc); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } return 0; } static const struct hid_device_id zc_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) }, { } }; MODULE_DEVICE_TABLE(hid, zc_devices); static struct hid_driver zc_driver = { .name = "zydacron", .id_table = zc_devices, .report_fixup = zc_report_fixup, .input_mapping = zc_input_mapping, .raw_event = zc_raw_event, .probe = zc_probe, }; module_hid_driver(zc_driver); MODULE_DESCRIPTION("HID driver for zydacron remote control"); MODULE_LICENSE("GPL");
3 2 2 1 1 7 7 1 1 3 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 // SPDX-License-Identifier: GPL-2.0-only /* * Optimized MPEG FS - inode and super operations. * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com> */ #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/vfs.h> #include <linux/cred.h> #include <linux/parser.h> #include <linux/buffer_head.h> #include <linux/vmalloc.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/crc-itu-t.h> #include "omfs.h" MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>"); MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); MODULE_LICENSE("GPL"); struct buffer_head *omfs_bread(struct super_block *sb, sector_t block) { struct omfs_sb_info *sbi = OMFS_SB(sb); if (block >= sbi->s_num_blocks) return NULL; return sb_bread(sb, clus_to_blk(sbi, block)); } struct inode *omfs_new_inode(struct inode *dir, umode_t mode) { struct inode *inode; u64 new_block; int err; int len; struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb); inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors, &new_block, &len); if (err) goto fail; inode->i_ino = new_block; inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_mapping->a_ops = &omfs_aops; simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_op = &omfs_dir_inops; inode->i_fop = &omfs_dir_operations; inode->i_size = sbi->s_sys_blocksize; inc_nlink(inode); break; case S_IFREG: inode->i_op = &omfs_file_inops; inode->i_fop = &omfs_file_operations; inode->i_size = 0; break; } insert_inode_hash(inode); mark_inode_dirty(inode); return inode; fail: make_bad_inode(inode); iput(inode); return ERR_PTR(err); } /* * Update the header checksums for a dirty inode based on its contents. * Caller is expected to hold the buffer head underlying oi and mark it * dirty. */ static void omfs_update_checksums(struct omfs_inode *oi) { int xor, i, ofs = 0, count; u16 crc = 0; unsigned char *ptr = (unsigned char *) oi; count = be32_to_cpu(oi->i_head.h_body_size); ofs = sizeof(struct omfs_header); crc = crc_itu_t(crc, ptr + ofs, count); oi->i_head.h_crc = cpu_to_be16(crc); xor = ptr[0]; for (i = 1; i < OMFS_XOR_COUNT; i++) xor ^= ptr[i]; oi->i_head.h_check_xor = xor; } static int __omfs_write_inode(struct inode *inode, int wait) { struct omfs_inode *oi; struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); struct buffer_head *bh, *bh2; u64 ctime; int i; int ret = -EIO; int sync_failed = 0; /* get current inode since we may have written sibling ptrs etc. */ bh = omfs_bread(inode->i_sb, inode->i_ino); if (!bh) goto out; oi = (struct omfs_inode *) bh->b_data; oi->i_head.h_self = cpu_to_be64(inode->i_ino); if (S_ISDIR(inode->i_mode)) oi->i_type = OMFS_DIR; else if (S_ISREG(inode->i_mode)) oi->i_type = OMFS_FILE; else { printk(KERN_WARNING "omfs: unknown file type: %d\n", inode->i_mode); goto out_brelse; } oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize - sizeof(struct omfs_header)); oi->i_head.h_version = 1; oi->i_head.h_type = OMFS_INODE_NORMAL; oi->i_head.h_magic = OMFS_IMAGIC; oi->i_size = cpu_to_be64(inode->i_size); ctime = inode_get_ctime_sec(inode) * 1000LL + ((inode_get_ctime_nsec(inode) + 999)/1000); oi->i_ctime = cpu_to_be64(ctime); omfs_update_checksums(oi); mark_buffer_dirty(bh); if (wait) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) sync_failed = 1; } /* if mirroring writes, copy to next fsblock */ for (i = 1; i < sbi->s_mirrors; i++) { bh2 = omfs_bread(inode->i_sb, inode->i_ino + i); if (!bh2) goto out_brelse; memcpy(bh2->b_data, bh->b_data, bh->b_size); mark_buffer_dirty(bh2); if (wait) { sync_dirty_buffer(bh2); if (buffer_req(bh2) && !buffer_uptodate(bh2)) sync_failed = 1; } brelse(bh2); } ret = (sync_failed) ? -EIO : 0; out_brelse: brelse(bh); out: return ret; } static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc) { return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } int omfs_sync_inode(struct inode *inode) { return __omfs_write_inode(inode, 1); } /* * called when an entry is deleted, need to clear the bits in the * bitmaps. */ static void omfs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_nlink) return; if (S_ISREG(inode->i_mode)) { inode->i_size = 0; omfs_shrink_inode(inode); } omfs_clear_range(inode->i_sb, inode->i_ino, 2); } struct inode *omfs_iget(struct super_block *sb, ino_t ino) { struct omfs_sb_info *sbi = OMFS_SB(sb); struct omfs_inode *oi; struct buffer_head *bh; u64 ctime; unsigned long nsecs; struct inode *inode; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; bh = omfs_bread(inode->i_sb, ino); if (!bh) goto iget_failed; oi = (struct omfs_inode *)bh->b_data; /* check self */ if (ino != be64_to_cpu(oi->i_head.h_self)) goto fail_bh; inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; ctime = be64_to_cpu(oi->i_ctime); nsecs = do_div(ctime, 1000) * 1000L; inode_set_atime(inode, ctime, nsecs); inode_set_mtime(inode, ctime, nsecs); inode_set_ctime(inode, ctime, nsecs); inode->i_mapping->a_ops = &omfs_aops; switch (oi->i_type) { case OMFS_DIR: inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); inode->i_op = &omfs_dir_inops; inode->i_fop = &omfs_dir_operations; inode->i_size = sbi->s_sys_blocksize; inc_nlink(inode); break; case OMFS_FILE: inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask); inode->i_fop = &omfs_file_operations; inode->i_size = be64_to_cpu(oi->i_size); break; } brelse(bh); unlock_new_inode(inode); return inode; fail_bh: brelse(bh); iget_failed: iget_failed(inode); return ERR_PTR(-EIO); } static void omfs_put_super(struct super_block *sb) { struct omfs_sb_info *sbi = OMFS_SB(sb); kfree(sbi->s_imap); kfree(sbi); sb->s_fs_info = NULL; } static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *s = dentry->d_sb; struct omfs_sb_info *sbi = OMFS_SB(s); u64 id = huge_encode_dev(s->s_bdev->bd_dev); buf->f_type = OMFS_MAGIC; buf->f_bsize = sbi->s_blocksize; buf->f_blocks = sbi->s_num_blocks; buf->f_files = sbi->s_num_blocks; buf->f_namelen = OMFS_NAMELEN; buf->f_fsid = u64_to_fsid(id); buf->f_bfree = buf->f_bavail = buf->f_ffree = omfs_count_free(s); return 0; } /* * Display the mount options in /proc/mounts. */ static int omfs_show_options(struct seq_file *m, struct dentry *root) { struct omfs_sb_info *sbi = OMFS_SB(root->d_sb); umode_t cur_umask = current_umask(); if (!uid_eq(sbi->s_uid, current_uid())) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->s_uid)); if (!gid_eq(sbi->s_gid, current_gid())) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->s_gid)); if (sbi->s_dmask == sbi->s_fmask) { if (sbi->s_fmask != cur_umask) seq_printf(m, ",umask=%o", sbi->s_fmask); } else { if (sbi->s_dmask != cur_umask) seq_printf(m, ",dmask=%o", sbi->s_dmask); if (sbi->s_fmask != cur_umask) seq_printf(m, ",fmask=%o", sbi->s_fmask); } return 0; } static const struct super_operations omfs_sops = { .write_inode = omfs_write_inode, .evict_inode = omfs_evict_inode, .put_super = omfs_put_super, .statfs = omfs_statfs, .show_options = omfs_show_options, }; /* * For Rio Karma, there is an on-disk free bitmap whose location is * stored in the root block. For ReplayTV, there is no such free bitmap * so we have to walk the tree. Both inodes and file data are allocated * from the same map. This array can be big (300k) so we allocate * in units of the blocksize. */ static int omfs_get_imap(struct super_block *sb) { unsigned int bitmap_size, array_size; int count; struct omfs_sb_info *sbi = OMFS_SB(sb); struct buffer_head *bh; unsigned long **ptr; sector_t block; bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8); array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize); if (sbi->s_bitmap_ino == ~0ULL) goto out; sbi->s_imap_size = array_size; sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL); if (!sbi->s_imap) goto nomem; block = clus_to_blk(sbi, sbi->s_bitmap_ino); if (block >= sbi->s_num_blocks) goto nomem; ptr = sbi->s_imap; for (count = bitmap_size; count > 0; count -= sb->s_blocksize) { bh = sb_bread(sb, block++); if (!bh) goto nomem_free; *ptr = kmemdup(bh->b_data, sb->s_blocksize, GFP_KERNEL); if (!*ptr) { brelse(bh); goto nomem_free; } if (count < sb->s_blocksize) memset((void *)*ptr + count, 0xff, sb->s_blocksize - count); brelse(bh); ptr++; } out: return 0; nomem_free: for (count = 0; count < array_size; count++) kfree(sbi->s_imap[count]); kfree(sbi->s_imap); nomem: sbi->s_imap = NULL; sbi->s_imap_size = 0; return -ENOMEM; } enum { Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err }; static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_umask, "umask=%o"}, {Opt_dmask, "dmask=%o"}, {Opt_fmask, "fmask=%o"}, {Opt_err, NULL}, }; static int parse_options(char *options, struct omfs_sb_info *sbi) { char *p; substring_t args[MAX_OPT_ARGS]; int option; if (!options) return 1; while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_uid: if (match_int(&args[0], &option)) return 0; sbi->s_uid = make_kuid(current_user_ns(), option); if (!uid_valid(sbi->s_uid)) return 0; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; sbi->s_gid = make_kgid(current_user_ns(), option); if (!gid_valid(sbi->s_gid)) return 0; break; case Opt_umask: if (match_octal(&args[0], &option)) return 0; sbi->s_fmask = sbi->s_dmask = option; break; case Opt_dmask: if (match_octal(&args[0], &option)) return 0; sbi->s_dmask = option; break; case Opt_fmask: if (match_octal(&args[0], &option)) return 0; sbi->s_fmask = option; break; default: return 0; } } return 1; } static int omfs_fill_super(struct super_block *sb, void *data, int silent) { struct buffer_head *bh, *bh2; struct omfs_super_block *omfs_sb; struct omfs_root_block *omfs_rb; struct omfs_sb_info *sbi; struct inode *root; int ret = -EINVAL; sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; sbi->s_uid = current_uid(); sbi->s_gid = current_gid(); sbi->s_dmask = sbi->s_fmask = current_umask(); if (!parse_options((char *) data, sbi)) goto end; sb->s_maxbytes = 0xffffffff; sb->s_time_gran = NSEC_PER_MSEC; sb->s_time_min = 0; sb->s_time_max = U64_MAX / MSEC_PER_SEC; sb_set_blocksize(sb, 0x200); bh = sb_bread(sb, 0); if (!bh) goto end; omfs_sb = (struct omfs_super_block *)bh->b_data; if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) { if (!silent) printk(KERN_ERR "omfs: Invalid superblock (%x)\n", omfs_sb->s_magic); goto out_brelse_bh; } sb->s_magic = OMFS_MAGIC; sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks); sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize); sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors); sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block); sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize); mutex_init(&sbi->s_bitmap_lock); if (sbi->s_num_blocks > OMFS_MAX_BLOCKS) { printk(KERN_ERR "omfs: sysblock number (%llx) is out of range\n", (unsigned long long)sbi->s_num_blocks); goto out_brelse_bh; } if (sbi->s_sys_blocksize > PAGE_SIZE) { printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n", sbi->s_sys_blocksize); goto out_brelse_bh; } if (sbi->s_blocksize < sbi->s_sys_blocksize || sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) { printk(KERN_ERR "omfs: block size (%d) is out of range\n", sbi->s_blocksize); goto out_brelse_bh; } /* * Use sys_blocksize as the fs block since it is smaller than a * page while the fs blocksize can be larger. */ sb_set_blocksize(sb, sbi->s_sys_blocksize); /* * ...and the difference goes into a shift. sys_blocksize is always * a power of two factor of blocksize. */ sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) - get_bitmask_order(sbi->s_sys_blocksize); bh2 = omfs_bread(sb, be64_to_cpu(omfs_sb->s_root_block)); if (!bh2) goto out_brelse_bh; omfs_rb = (struct omfs_root_block *)bh2->b_data; sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap); sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize); if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) { printk(KERN_ERR "omfs: block count discrepancy between " "super and root blocks (%llx, %llx)\n", (unsigned long long)sbi->s_num_blocks, (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks)); goto out_brelse_bh2; } if (sbi->s_bitmap_ino != ~0ULL && sbi->s_bitmap_ino > sbi->s_num_blocks) { printk(KERN_ERR "omfs: free space bitmap location is corrupt " "(%llx, total blocks %llx)\n", (unsigned long long) sbi->s_bitmap_ino, (unsigned long long) sbi->s_num_blocks); goto out_brelse_bh2; } if (sbi->s_clustersize < 1 || sbi->s_clustersize > OMFS_MAX_CLUSTER_SIZE) { printk(KERN_ERR "omfs: cluster size out of range (%d)", sbi->s_clustersize); goto out_brelse_bh2; } ret = omfs_get_imap(sb); if (ret) goto out_brelse_bh2; sb->s_op = &omfs_sops; root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir)); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out_brelse_bh2; } sb->s_root = d_make_root(root); if (!sb->s_root) { ret = -ENOMEM; goto out_brelse_bh2; } printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); ret = 0; out_brelse_bh2: brelse(bh2); out_brelse_bh: brelse(bh); end: if (ret) kfree(sbi); return ret; } static struct dentry *omfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super); } static struct file_system_type omfs_fs_type = { .owner = THIS_MODULE, .name = "omfs", .mount = omfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("omfs"); static int __init init_omfs_fs(void) { return register_filesystem(&omfs_fs_type); } static void __exit exit_omfs_fs(void) { unregister_filesystem(&omfs_fs_type); } module_init(init_omfs_fs); module_exit(exit_omfs_fs);
17 5 200 211 51 260 4 1 1 4 23 16 13 29 29 23 4 23 19 18 17 8 8 8 8 6 8 8 7 8 1 14 17 14 11 11 21 3 3 1 3 3 7 23 2 27 23 13 27 29 29 6 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 // SPDX-License-Identifier: GPL-2.0 /* * This module exports the functions: * * 'int set_selection_user(struct tiocl_selection __user *, * struct tty_struct *)' * 'int set_selection_kernel(struct tiocl_selection *, struct tty_struct *)' * 'void clear_selection(void)' * 'int paste_selection(struct tty_struct *)' * 'int sel_loadlut(u32 __user *)' * * Now that /dev/vcs exists, most of this can disappear again. */ #include <linux/module.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/kbd_kern.h> #include <linux/vt_kern.h> #include <linux/consolemap.h> #include <linux/selection.h> #include <linux/tiocl.h> #include <linux/console.h> #include <linux/tty_flip.h> #include <linux/sched/signal.h> /* Don't take this from <ctype.h>: 011-015 on the screen aren't spaces */ #define is_space_on_vt(c) ((c) == ' ') /* FIXME: all this needs locking */ static struct vc_selection { struct mutex lock; struct vc_data *cons; /* must not be deallocated */ char *buffer; unsigned int buf_len; volatile int start; /* cleared by clear_selection */ int end; } vc_sel = { .lock = __MUTEX_INITIALIZER(vc_sel.lock), .start = -1, }; /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ /* set reverse video on characters s-e of console with selection. */ static inline void highlight(const int s, const int e) { invert_screen(vc_sel.cons, s, e-s+2, true); } /* use complementary color to show the pointer */ static inline void highlight_pointer(const int where) { complement_pos(vc_sel.cons, where); } static u32 sel_pos(int n, bool unicode) { if (unicode) return screen_glyph_unicode(vc_sel.cons, n / 2); return inverse_translate(vc_sel.cons, screen_glyph(vc_sel.cons, n), false); } /** * clear_selection - remove current selection * * Remove the current selection highlight, if any from the console holding the * selection. * * Locking: The caller must hold the console lock. */ void clear_selection(void) { highlight_pointer(-1); /* hide the pointer */ if (vc_sel.start != -1) { highlight(vc_sel.start, vc_sel.end); vc_sel.start = -1; } } EXPORT_SYMBOL_GPL(clear_selection); bool vc_is_sel(const struct vc_data *vc) { return vc == vc_sel.cons; } /* * User settable table: what characters are to be considered alphabetic? * 128 bits. Locked by the console lock. */ static u32 inwordLut[]={ 0x00000000, /* control chars */ 0x03FFE000, /* digits and "-./" */ 0x87FFFFFE, /* uppercase and '_' */ 0x07FFFFFE, /* lowercase */ }; static inline int inword(const u32 c) { return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1); } /** * sel_loadlut() - load the LUT table * @lut: user table * * Load the LUT table from user space. Make a temporary copy so a partial * update doesn't make a mess. * * Locking: The console lock is acquired. */ int sel_loadlut(u32 __user *lut) { u32 tmplut[ARRAY_SIZE(inwordLut)]; if (copy_from_user(tmplut, lut, sizeof(inwordLut))) return -EFAULT; console_lock(); memcpy(inwordLut, tmplut, sizeof(inwordLut)); console_unlock(); return 0; } /* does screen address p correspond to character at LH/RH edge of screen? */ static inline int atedge(const int p, int size_row) { return (!(p % size_row) || !((p + 2) % size_row)); } /* stores the char in UTF8 and returns the number of bytes used (1-4) */ static int store_utf8(u32 c, char *p) { if (c < 0x80) { /* 0******* */ p[0] = c; return 1; } else if (c < 0x800) { /* 110***** 10****** */ p[0] = 0xc0 | (c >> 6); p[1] = 0x80 | (c & 0x3f); return 2; } else if (c < 0x10000) { /* 1110**** 10****** 10****** */ p[0] = 0xe0 | (c >> 12); p[1] = 0x80 | ((c >> 6) & 0x3f); p[2] = 0x80 | (c & 0x3f); return 3; } else if (c < 0x110000) { /* 11110*** 10****** 10****** 10****** */ p[0] = 0xf0 | (c >> 18); p[1] = 0x80 | ((c >> 12) & 0x3f); p[2] = 0x80 | ((c >> 6) & 0x3f); p[3] = 0x80 | (c & 0x3f); return 4; } else { /* outside Unicode, replace with U+FFFD */ p[0] = 0xef; p[1] = 0xbf; p[2] = 0xbd; return 3; } } /** * set_selection_user - set the current selection. * @sel: user selection info * @tty: the console tty * * Invoked by the ioctl handle for the vt layer. * * Locking: The entire selection process is managed under the console_lock. * It's a lot under the lock but its hardly a performance path. */ int set_selection_user(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct tiocl_selection v; if (copy_from_user(&v, sel, sizeof(*sel))) return -EFAULT; return set_selection_kernel(&v, tty); } static int vc_selection_store_chars(struct vc_data *vc, bool unicode) { char *bp, *obp; unsigned int i; /* Allocate a new buffer before freeing the old one ... */ /* chars can take up to 4 bytes with unicode */ bp = kmalloc_array((vc_sel.end - vc_sel.start) / 2 + 1, unicode ? 4 : 1, GFP_KERNEL | __GFP_NOWARN); if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); return -ENOMEM; } kfree(vc_sel.buffer); vc_sel.buffer = bp; obp = bp; for (i = vc_sel.start; i <= vc_sel.end; i += 2) { u32 c = sel_pos(i, unicode); if (unicode) bp += store_utf8(c, bp); else *bp++ = c; if (!is_space_on_vt(c)) obp = bp; if (!((i + 2) % vc->vc_size_row)) { /* strip trailing blanks from line and add newline, unless non-space at end of line. */ if (obp != bp) { bp = obp; *bp++ = '\r'; } obp = bp; } } vc_sel.buf_len = bp - vc_sel.buffer; return 0; } static int vc_do_selection(struct vc_data *vc, unsigned short mode, int ps, int pe) { int new_sel_start, new_sel_end, spc; bool unicode = vt_do_kdgkbmode(fg_console) == K_UNICODE; switch (mode) { case TIOCL_SELCHAR: /* character-by-character selection */ new_sel_start = ps; new_sel_end = pe; break; case TIOCL_SELWORD: /* word-by-word selection */ spc = is_space_on_vt(sel_pos(ps, unicode)); for (new_sel_start = ps; ; ps -= 2) { if ((spc && !is_space_on_vt(sel_pos(ps, unicode))) || (!spc && !inword(sel_pos(ps, unicode)))) break; new_sel_start = ps; if (!(ps % vc->vc_size_row)) break; } spc = is_space_on_vt(sel_pos(pe, unicode)); for (new_sel_end = pe; ; pe += 2) { if ((spc && !is_space_on_vt(sel_pos(pe, unicode))) || (!spc && !inword(sel_pos(pe, unicode)))) break; new_sel_end = pe; if (!((pe + 2) % vc->vc_size_row)) break; } break; case TIOCL_SELLINE: /* line-by-line selection */ new_sel_start = rounddown(ps, vc->vc_size_row); new_sel_end = rounddown(pe, vc->vc_size_row) + vc->vc_size_row - 2; break; case TIOCL_SELPOINTER: highlight_pointer(pe); return 0; default: return -EINVAL; } /* remove the pointer */ highlight_pointer(-1); /* select to end of line if on trailing space */ if (new_sel_end > new_sel_start && !atedge(new_sel_end, vc->vc_size_row) && is_space_on_vt(sel_pos(new_sel_end, unicode))) { for (pe = new_sel_end + 2; ; pe += 2) if (!is_space_on_vt(sel_pos(pe, unicode)) || atedge(pe, vc->vc_size_row)) break; if (is_space_on_vt(sel_pos(pe, unicode))) new_sel_end = pe; } if (vc_sel.start == -1) /* no current selection */ highlight(new_sel_start, new_sel_end); else if (new_sel_start == vc_sel.start) { if (new_sel_end == vc_sel.end) /* no action required */ return 0; else if (new_sel_end > vc_sel.end) /* extend to right */ highlight(vc_sel.end + 2, new_sel_end); else /* contract from right */ highlight(new_sel_end + 2, vc_sel.end); } else if (new_sel_end == vc_sel.end) { if (new_sel_start < vc_sel.start) /* extend to left */ highlight(new_sel_start, vc_sel.start - 2); else /* contract from left */ highlight(vc_sel.start, new_sel_start - 2); } else /* some other case; start selection from scratch */ { clear_selection(); highlight(new_sel_start, new_sel_end); } vc_sel.start = new_sel_start; vc_sel.end = new_sel_end; return vc_selection_store_chars(vc, unicode); } static int vc_selection(struct vc_data *vc, struct tiocl_selection *v, struct tty_struct *tty) { int ps, pe; poke_blanked_console(); if (v->sel_mode == TIOCL_SELCLEAR) { /* useful for screendump without selection highlights */ clear_selection(); return 0; } v->xs = min_t(u16, v->xs - 1, vc->vc_cols - 1); v->ys = min_t(u16, v->ys - 1, vc->vc_rows - 1); v->xe = min_t(u16, v->xe - 1, vc->vc_cols - 1); v->ye = min_t(u16, v->ye - 1, vc->vc_rows - 1); if (mouse_reporting() && (v->sel_mode & TIOCL_SELMOUSEREPORT)) { mouse_report(tty, v->sel_mode & TIOCL_SELBUTTONMASK, v->xs, v->ys); return 0; } ps = v->ys * vc->vc_size_row + (v->xs << 1); pe = v->ye * vc->vc_size_row + (v->xe << 1); if (ps > pe) /* make vc_sel.start <= vc_sel.end */ swap(ps, pe); if (vc_sel.cons != vc) { clear_selection(); vc_sel.cons = vc; } return vc_do_selection(vc, v->sel_mode, ps, pe); } int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) { int ret; mutex_lock(&vc_sel.lock); console_lock(); ret = vc_selection(vc_cons[fg_console].d, v, tty); console_unlock(); mutex_unlock(&vc_sel.lock); return ret; } EXPORT_SYMBOL_GPL(set_selection_kernel); /* Insert the contents of the selection buffer into the * queue of the tty associated with the current console. * Invoked by ioctl(). * * Locking: called without locks. Calls the ldisc wrongly with * unsafe methods, */ int paste_selection(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; int pasted = 0; size_t count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); int ret = 0; console_lock(); poke_blanked_console(); console_unlock(); ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; /* ldisc was hung up */ tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); mutex_lock(&vc_sel.lock); while (vc_sel.buffer && vc_sel.buf_len > pasted) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { ret = -EINTR; break; } if (tty_throttled(tty)) { mutex_unlock(&vc_sel.lock); schedule(); mutex_lock(&vc_sel.lock); continue; } __set_current_state(TASK_RUNNING); count = vc_sel.buf_len - pasted; count = tty_ldisc_receive_buf(ld, vc_sel.buffer + pasted, NULL, count); pasted += count; } mutex_unlock(&vc_sel.lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); return ret; } EXPORT_SYMBOL_GPL(paste_selection);
7 8 10 5 8 2350 2320 2319 1101 1101 1099 92 16 273 272 2 5 3 379 235 269 236 102 16 385 5 382 268 3 266 270 387 387 57 386 82 87 386 384 386 19 385 20 1 1 21 1 20 15 1 3 13 5 13 5 18 3 3 3 7 7 6 5 3 1 3 3 25 1 24 24 7 20 21 21 21 74 74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 // SPDX-License-Identifier: GPL-2.0-only /* * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules * * Copyright (C)2003-2006 Helsinki University of Technology * Copyright (C)2003-2006 USAGI/WIDE Project * * Authors * Thomas Graf <tgraf@suug.ch> * Ville Nuorvala <vnuorval@tcs.hut.fi> */ #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/export.h> #include <linux/indirect_call_wrapper.h> #include <net/fib_rules.h> #include <net/inet_dscp.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/netlink.h> struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; dscp_t dscp; u8 dscp_full:1; /* DSCP or TOS selector */ }; static bool fib6_rule_matchall(const struct fib_rule *rule) { struct fib6_rule *r = container_of(rule, struct fib6_rule, common); if (r->dst.plen || r->src.plen || r->dscp) return false; return fib_rule_matchall(rule); } bool fib6_rule_default(const struct fib_rule *rule) { if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || rule->l3mdev) return false; if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN) return false; return true; } EXPORT_SYMBOL_GPL(fib6_rule_default); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return fib_rules_dump(net, nb, AF_INET6, extack); } unsigned int fib6_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, AF_INET6); } /* called with rcu lock held; no reference taken on fib6_info */ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags) { int err; if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = fib6_table_lookup, .lookup_data = &oif, .result = res, .flags = FIB_LOOKUP_NOREF, }; l3mdev_update_flow(net, flowi6_to_flowi(fl6)); err = fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); } else { err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif, fl6, res, flags); if (err || res->f6i == net->ipv6.fib6_null_entry) err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, res, flags); } return err; } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { struct fib6_result res = {}; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .lookup_data = skb, .result = &res, .flags = FIB_LOOKUP_NOREF, }; /* update flow if oif or iif point to device enslaved to l3mdev */ l3mdev_update_flow(net, flowi6_to_flowi(fl6)); fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); if (res.rt6) return &res.rt6->dst; } else { struct rt6_info *rt; rt = pol_lookup_func(lookup, net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); rt = pol_lookup_func(lookup, net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); } if (!(flags & RT6_LOOKUP_F_DST_NOREF)) dst_hold(&net->ipv6.ip6_null_entry->dst); return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags, struct flowi6 *flp6, const struct net_device *dev) { struct fib6_rule *r = (struct fib6_rule *)rule; /* If we need to find a source address for this traffic, * we check the result if it meets requirement of the rule. */ if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; if (ipv6_dev_get_saddr(net, dev, &flp6->daddr, rt6_flags2srcprefs(flags), &saddr)) return -EAGAIN; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) return -EAGAIN; flp6->saddr = saddr; } return 0; } static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct net *net = rule->fr_net; struct fib6_table *table; int err, *oif; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } tb_id = fib_rule_get_table(rule, arg); table = fib6_get_table(net, tb_id); if (!table) return -EAGAIN; oif = (int *)arg->lookup_data; err = fib6_table_lookup(net, table, *oif, flp6, res, flags); if (!err && res->f6i != net->ipv6.fib6_null_entry) err = fib6_rule_saddr(net, rule, flags, flp6, res->nh->fib_nh_dev); else err = -EAGAIN; return err; } static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; int err = 0; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: err = -ENETUNREACH; rt = net->ipv6.ip6_null_entry; goto discard_pkt; default: case FR_ACT_BLACKHOLE: err = -EINVAL; rt = net->ipv6.ip6_blk_hole_entry; goto discard_pkt; case FR_ACT_PROHIBIT: err = -EACCES; rt = net->ipv6.ip6_prohibit_entry; goto discard_pkt; } tb_id = fib_rule_get_table(rule, arg); table = fib6_get_table(net, tb_id); if (!table) { err = -EAGAIN; goto out; } rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { struct inet6_dev *idev = ip6_dst_idev(&rt->dst); if (!idev) goto again; err = fib6_rule_saddr(net, rule, flags, flp6, idev->dev); if (err == -EAGAIN) goto again; err = rt->dst.error; if (err != -EAGAIN) goto out; } again: ip6_rt_put_flags(rt, flags); err = -EAGAIN; rt = NULL; goto out; discard_pkt: if (!(flags & RT6_LOOKUP_F_DST_NOREF)) dst_hold(&rt->dst); out: res->rt6 = rt; return err; } INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { if (arg->lookup_ptr == fib6_table_lookup) return fib6_rule_action_alt(rule, flp, flags, arg); return __fib6_rule_action(rule, flp, flags, arg); } INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct rt6_info *rt = res->rt6; struct net_device *dev = NULL; if (!rt) return false; if (rt->rt6i_idev) dev = rt->rt6i_idev->dev; /* do not accept result if the route does * not meet the required prefix length */ if (rt->rt6i_dst.plen <= rule->suppress_prefixlen) goto suppress_route; /* do not accept result if the route uses a device * belonging to a forbidden interface group */ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup) goto suppress_route; return false; suppress_route: ip6_rt_put_flags(rt, flags); return true; } INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; struct flowi6 *fl6 = &fl->u.ip6; if (r->dst.plen && !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen)) return 0; /* * If FIB_RULE_FIND_SADDR is set and we do not have a * source address for the traffic, we defer check for * source address. */ if (r->src.plen) { if (flags & RT6_LOOKUP_F_HAS_SADDR) { if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr, r->src.plen)) return 0; } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) return 0; } if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) return 0; if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; if (fib_rule_port_range_set(&rule->sport_range) && !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) return 0; if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) return 0; return 1; } static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6, struct netlink_ext_ack *extack) { if (rule6->dscp) { NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); return -EINVAL; } rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); rule6->dscp_full = true; return 0; } static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) { int err = -EINVAL; struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, "Invalid dsfield (tos): ECN bits must be 0"); goto errout; } rule6->dscp = inet_dsfield_to_dscp(frh->tos); if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) goto errout; if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); goto errout; } if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; goto errout; } } if (frh->src_len) rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]); if (frh->dst_len) rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]); rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; if (fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect++; net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; } static int fib6_rule_delete(struct fib_rule *rule) { struct net *net = rule->fr_net; if (net->ipv6.fib6_rules_require_fldissect && fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect--; return 0; } static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (frh->src_len && (rule6->src.plen != frh->src_len)) return 0; if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) return 0; if (frh->tos && (rule6->dscp_full || inet_dscp_to_dsfield(rule6->dscp) != frh->tos)) return 0; if (tb[FRA_DSCP]) { dscp_t dscp; dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); if (!rule6->dscp_full || rule6->dscp != dscp) return 0; } if (frh->src_len && nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) return 0; if (frh->dst_len && nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; return 1; } static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh) { struct fib6_rule *rule6 = (struct fib6_rule *) rule; frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; if (rule6->dscp_full) { frh->tos = 0; if (nla_put_u8(skb, FRA_DSCP, inet_dscp_to_dsfield(rule6->dscp) >> 2)) goto nla_put_failure; } else { frh->tos = inet_dscp_to_dsfield(rule6->dscp); } if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; nla_put_failure: return -ENOBUFS; } static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) { return nla_total_size(16) /* dst */ + nla_total_size(16) /* src */ + nla_total_size(1); /* dscp */ } static void fib6_rule_flush_cache(struct fib_rules_ops *ops) { rt_genid_bump_ipv6(ops->fro_net); } static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, .match = fib6_rule_match, .suppress = fib6_rule_suppress, .configure = fib6_rule_configure, .delete = fib6_rule_delete, .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, }; static int __net_init fib6_rules_net_init(struct net *net) { struct fib_rules_ops *ops; int err; ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL); if (err) goto out_fib6_rules_ops; err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN); if (err) goto out_fib6_rules_ops; net->ipv6.fib6_rules_ops = ops; net->ipv6.fib6_rules_require_fldissect = 0; out: return err; out_fib6_rules_ops: fib_rules_unregister(ops); goto out; } static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list) { struct net *net; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { fib_rules_unregister(net->ipv6.fib6_rules_ops); cond_resched(); } rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, .exit_batch = fib6_rules_net_exit_batch, }; int __init fib6_rules_init(void) { return register_pernet_subsys(&fib6_rules_net_ops); } void fib6_rules_cleanup(void) { unregister_pernet_subsys(&fib6_rules_net_ops); }
23 1 2 21 22 23 14 11 3 8 10 2 2 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 /* * Routines to compress and uncompress tcp packets (for transmission * over low speed serial lines). * * Copyright (c) 1989 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley. The name of the * University may not be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: * - Initial distribution. * * * modified for KA9Q Internet Software Package by * Katie Stevens (dkstevens@ucdavis.edu) * University of California, Davis * Computing Services * - 01-31-90 initial adaptation (from 1.19) * PPP.05 02-15-90 [ks] * PPP.08 05-02-90 [ks] use PPP protocol field to signal compression * PPP.15 09-90 [ks] improve mbuf handling * PPP.16 11-02 [karn] substantially rewritten to use NOS facilities * * - Feb 1991 Bill_Simpson@um.cc.umich.edu * variable number of conversation slots * allow zero or one slots * separate routines * status display * - Jul 1994 Dmitry Gorodchanin * Fixes for memory leaks. * - Oct 1994 Dmitry Gorodchanin * Modularization. * - Jan 1995 Bjorn Ekwall * Use ip_fast_csum from ip.h * - July 1995 Christos A. Polyzols * Spotted bug in tcp option checking * * * This module is a difficult issue. It's clearly inet code but it's also clearly * driver code belonging close to PPP and SLIP */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kernel.h> #include <net/slhc_vj.h> #ifdef CONFIG_INET /* Entire module is for IP only */ #include <linux/mm.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/termios.h> #include <linux/in.h> #include <linux/fcntl.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/protocol.h> #include <net/icmp.h> #include <net/tcp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/timer.h> #include <linux/uaccess.h> #include <net/checksum.h> #include <linux/unaligned.h> static unsigned char *encode(unsigned char *cp, unsigned short n); static long decode(unsigned char **cpp); static unsigned char * put16(unsigned char *cp, unsigned short x); static unsigned short pull16(unsigned char **cpp); /* Allocate compression data structure * slots must be in range 0 to 255 (zero meaning no compression) * Returns pointer to structure or ERR_PTR() on error. */ struct slcompress * slhc_init(int rslots, int tslots) { short i; struct cstate *ts; struct slcompress *comp; if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); if (! comp) goto out_fail; if (rslots > 0) { size_t rsize = rslots * sizeof(struct cstate); comp->rstate = kzalloc(rsize, GFP_KERNEL); if (! comp->rstate) goto out_free; comp->rslot_limit = rslots - 1; } if (tslots > 0) { size_t tsize = tslots * sizeof(struct cstate); comp->tstate = kzalloc(tsize, GFP_KERNEL); if (! comp->tstate) goto out_free2; comp->tslot_limit = tslots - 1; } comp->xmit_oldest = 0; comp->xmit_current = 255; comp->recv_current = 255; /* * don't accept any packets with implicit index until we get * one with an explicit index. Otherwise the uncompress code * will try to use connection 255, which is almost certainly * out of range */ comp->flags |= SLF_TOSS; if ( tslots > 0 ) { ts = comp->tstate; for(i = comp->tslot_limit; i > 0; --i){ ts[i].cs_this = i; ts[i].next = &(ts[i - 1]); } ts[0].next = &(ts[comp->tslot_limit]); ts[0].cs_this = 0; } return comp; out_free2: kfree(comp->rstate); out_free: kfree(comp); out_fail: return ERR_PTR(-ENOMEM); } /* Free a compression data structure */ void slhc_free(struct slcompress *comp) { if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) kfree( comp->tstate ); if ( comp->rstate != NULLSLSTATE ) kfree( comp->rstate ); kfree( comp ); } /* Put a short in host order into a char array in network order */ static inline unsigned char * put16(unsigned char *cp, unsigned short x) { *cp++ = x >> 8; *cp++ = x; return cp; } /* Encode a number */ static unsigned char * encode(unsigned char *cp, unsigned short n) { if(n >= 256 || n == 0){ *cp++ = 0; cp = put16(cp,n); } else { *cp++ = n; } return cp; } /* Pull a 16-bit integer in host order from buffer in network byte order */ static unsigned short pull16(unsigned char **cpp) { short rval; rval = *(*cpp)++; rval <<= 8; rval |= *(*cpp)++; return rval; } /* Decode a number */ static long decode(unsigned char **cpp) { int x; x = *(*cpp)++; if(x == 0){ return pull16(cpp) & 0xffff; /* pull16 returns -1 on error */ } else { return x & 0xff; /* -1 if PULLCHAR returned error */ } } /* * icp and isize are the original packet. * ocp is a place to put a copy if necessary. * cpp is initially a pointer to icp. If the copy is used, * change it to ocp. */ int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { struct cstate *ocs = &(comp->tstate[comp->xmit_oldest]); struct cstate *lcs = ocs; struct cstate *cs = lcs->next; unsigned long deltaS, deltaA; short changes = 0; int nlen, hlen; unsigned char new_seq[16]; unsigned char *cp = new_seq; struct iphdr *ip; struct tcphdr *th, *oth; __sum16 csum; /* * Don't play with runt packets. */ if(isize<sizeof(struct iphdr)) return isize; ip = (struct iphdr *) icp; if (ip->version != 4 || ip->ihl < 5) return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { /* Send as regular IP */ if(ip->protocol != IPPROTO_TCP) comp->sls_o_nontcp++; else comp->sls_o_tcp++; return isize; } nlen = ip->ihl * 4; if (isize < nlen + sizeof(*th)) return isize; th = (struct tcphdr *)(icp + nlen); if (th->doff < sizeof(struct tcphdr) / 4) return isize; hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if * it's a runt. */ if(hlen > isize || th->syn || th->fin || th->rst || ! (th->ack)){ /* TCP connection stuff; send as regular IP */ comp->sls_o_tcp++; return isize; } /* * Packet is compressible -- we're going to send either a * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way, * we need to locate (or create) the connection state. * * States are kept in a circularly linked list with * xmit_oldest pointing to the end of the list. The * list is kept in lru order by moving a state to the * head of the list whenever it is referenced. Since * the list is short and, empirically, the connection * we want is almost always near the front, we locate * states via linear search. If we don't find a state * for the datagram, the oldest state is (re-)used. */ for ( ; ; ) { if( ip->saddr == cs->cs_ip.saddr && ip->daddr == cs->cs_ip.daddr && th->source == cs->cs_tcp.source && th->dest == cs->cs_tcp.dest) goto found; /* if current equal oldest, at end of list */ if ( cs == ocs ) break; lcs = cs; cs = cs->next; comp->sls_o_searches++; } /* * Didn't find it -- re-use oldest cstate. Send an * uncompressed packet that tells the other side what * connection number we're using for this conversation. * * Note that since the state list is circular, the oldest * state points to the newest and we only need to set * xmit_oldest to update the lru linkage. */ comp->sls_o_misses++; comp->xmit_oldest = lcs->cs_this; goto uncompressed; found: /* * Found it -- move to the front on the connection list. */ if(lcs == ocs) { /* found at most recently used */ } else if (cs == ocs) { /* found at least recently used */ comp->xmit_oldest = lcs->cs_this; } else { /* more than 2 elements */ lcs->next = cs->next; cs->next = ocs->next; ocs->next = cs; } /* * Make sure that only what we expect to change changed. * Check the following: * IP protocol version, header length & type of service. * The "Don't fragment" bit. * The time-to-live field. * The TCP header length. * IP options, if any. * TCP options, if any. * If any of these things are different between the previous & * current datagram, we send the current datagram `uncompressed'. */ oth = &cs->cs_tcp; if(ip->version != cs->cs_ip.version || ip->ihl != cs->cs_ip.ihl || ip->tos != cs->cs_ip.tos || (ip->frag_off & htons(0x4000)) != (cs->cs_ip.frag_off & htons(0x4000)) || ip->ttl != cs->cs_ip.ttl || th->doff != cs->cs_tcp.doff || (ip->ihl > 5 && memcmp(ip+1,cs->cs_ipopt,((ip->ihl)-5)*4) != 0) || (th->doff > 5 && memcmp(th+1,cs->cs_tcpopt,((th->doff)-5)*4) != 0)){ goto uncompressed; } /* * Figure out which of the changing fields changed. The * receiver expects changes in the order: urgent, window, * ack, seq (the order minimizes the number of temporaries * needed in this section of code). */ if(th->urg){ deltaS = ntohs(th->urg_ptr); cp = encode(cp,deltaS); changes |= NEW_U; } else if(th->urg_ptr != oth->urg_ptr){ /* argh! URG not set but urp changed -- a sensible * implementation should never do this but RFC793 * doesn't prohibit the change so we have to deal * with it. */ goto uncompressed; } if((deltaS = ntohs(th->window) - ntohs(oth->window)) != 0){ cp = encode(cp,deltaS); changes |= NEW_W; } if((deltaA = ntohl(th->ack_seq) - ntohl(oth->ack_seq)) != 0L){ if(deltaA > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaA); changes |= NEW_A; } if((deltaS = ntohl(th->seq) - ntohl(oth->seq)) != 0L){ if(deltaS > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaS); changes |= NEW_S; } switch(changes){ case 0: /* Nothing changed. If this packet contains data and the * last one didn't, this is probably a data packet following * an ack (normal on an interactive connection) and we send * it compressed. Otherwise it's probably a retransmit, * retransmitted ack or window probe. Send it uncompressed * in case the other side missed the compressed version. */ if(ip->tot_len != cs->cs_ip.tot_len && ntohs(cs->cs_ip.tot_len) == hlen) break; goto uncompressed; case SPECIAL_I: case SPECIAL_D: /* actual changes match one of our special case encodings -- * send packet uncompressed. */ goto uncompressed; case NEW_S|NEW_A: if(deltaS == deltaA && deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for echoed terminal traffic */ changes = SPECIAL_I; cp = new_seq; } break; case NEW_S: if(deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for data xfer */ changes = SPECIAL_D; cp = new_seq; } break; } deltaS = ntohs(ip->id) - ntohs(cs->cs_ip.id); if(deltaS != 1){ cp = encode(cp,deltaS); changes |= NEW_I; } if(th->psh) changes |= TCP_PUSH_BIT; /* Grab the cksum before we overwrite it below. Then update our * state with this packet's header. */ csum = th->check; memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); /* We want to use the original packet as our compressed packet. * (cp - new_seq) is the number of bytes we need for compressed * sequence numbers. In addition we need one byte for the change * mask, one for the connection id and two for the tcp checksum. * So, (cp - new_seq) + 4 bytes of header are needed. */ deltaS = cp - new_seq; if(compress_cid == 0 || comp->xmit_current != cs->cs_this){ cp = ocp; *cpp = ocp; *cp++ = changes | NEW_C; *cp++ = cs->cs_this; comp->xmit_current = cs->cs_this; } else { cp = ocp; *cpp = ocp; *cp++ = changes; } *(__sum16 *)cp = csum; cp += 2; /* deltaS is now the size of the change section of the compressed header */ memcpy(cp,new_seq,deltaS); /* Write list of deltas */ memcpy(cp+deltaS,icp+hlen,isize-hlen); comp->sls_o_compressed++; ocp[0] |= SL_TYPE_COMPRESSED_TCP; return isize - hlen + deltaS + (cp - ocp); /* Update connection state cs & send uncompressed packet (i.e., * a regular ip/tcp packet but with the 'conversation id' we hope * to use on future compressed packets in the protocol field). */ uncompressed: memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); if (ip->ihl > 5) memcpy(cs->cs_ipopt, ip+1, ((ip->ihl) - 5) * 4); if (th->doff > 5) memcpy(cs->cs_tcpopt, th+1, ((th->doff) - 5) * 4); comp->xmit_current = cs->cs_this; comp->sls_o_uncompressed++; memcpy(ocp, icp, isize); *cpp = ocp; ocp[9] = cs->cs_this; ocp[0] |= SL_TYPE_UNCOMPRESSED_TCP; return isize; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { int changes; long x; struct tcphdr *thp; struct iphdr *ip; struct cstate *cs; int len, hdrlen; unsigned char *cp = icp; /* We've got a compressed packet; read the change byte */ comp->sls_i_compressed++; if(isize < 3){ comp->sls_i_error++; return 0; } changes = *cp++; if(changes & NEW_C){ /* Make sure the state index is in range, then grab the state. * If we have a good state index, clear the 'discard' flag. */ x = *cp++; /* Read conn index */ if(x < 0 || x > comp->rslot_limit) goto bad; /* Check if the cstate is initialized */ if (!comp->rstate[x].initialized) goto bad; comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { /* this packet has an implicit state index. If we've * had a line error since the last time we got an * explicit state index, we have to toss the packet. */ if(comp->flags & SLF_TOSS){ comp->sls_i_tossed++; return 0; } } cs = &comp->rstate[comp->recv_current]; thp = &cs->cs_tcp; ip = &cs->cs_ip; thp->check = *(__sum16 *)cp; cp += 2; thp->psh = (changes & TCP_PUSH_BIT) ? 1 : 0; /* * we can use the same number for the length of the saved header and * the current one, because the packet wouldn't have been sent * as compressed unless the options were the same as the previous one */ hdrlen = ip->ihl * 4 + thp->doff * 4; switch(changes & SPECIALS_MASK){ case SPECIAL_I: /* Echoed terminal traffic */ { short i; i = ntohs(ip->tot_len) - hdrlen; thp->ack_seq = htonl( ntohl(thp->ack_seq) + i); thp->seq = htonl( ntohl(thp->seq) + i); } break; case SPECIAL_D: /* Unidirectional data */ thp->seq = htonl( ntohl(thp->seq) + ntohs(ip->tot_len) - hdrlen); break; default: if(changes & NEW_U){ thp->urg = 1; if((x = decode(&cp)) == -1) { goto bad; } thp->urg_ptr = htons(x); } else thp->urg = 0; if(changes & NEW_W){ if((x = decode(&cp)) == -1) { goto bad; } thp->window = htons( ntohs(thp->window) + x); } if(changes & NEW_A){ if((x = decode(&cp)) == -1) { goto bad; } thp->ack_seq = htonl( ntohl(thp->ack_seq) + x); } if(changes & NEW_S){ if((x = decode(&cp)) == -1) { goto bad; } thp->seq = htonl( ntohl(thp->seq) + x); } break; } if(changes & NEW_I){ if((x = decode(&cp)) == -1) { goto bad; } ip->id = htons (ntohs (ip->id) + x); } else ip->id = htons (ntohs (ip->id) + 1); /* * At this point, cp points to the first byte of data in the * packet. Put the reconstructed TCP and IP headers back on the * packet. Recalculate IP checksum (but not TCP checksum). */ len = isize - (cp - icp); if (len < 0) goto bad; len += hdrlen; ip->tot_len = htons(len); ip->check = 0; memmove(icp + hdrlen, cp, len - hdrlen); cp = icp; memcpy(cp, ip, 20); cp += 20; if (ip->ihl > 5) { memcpy(cp, cs->cs_ipopt, (ip->ihl - 5) * 4); cp += (ip->ihl - 5) * 4; } put_unaligned(ip_fast_csum(icp, ip->ihl), &((struct iphdr *)icp)->check); memcpy(cp, thp, 20); cp += 20; if (thp->doff > 5) { memcpy(cp, cs->cs_tcpopt, ((thp->doff) - 5) * 4); cp += ((thp->doff) - 5) * 4; } return len; bad: comp->sls_i_error++; return slhc_toss( comp ); } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { const struct tcphdr *th; unsigned char index; struct iphdr *iph; struct cstate *cs; unsigned int ihl; /* The packet is shorter than a legal IP header. * Also make sure isize is positive. */ if (isize < (int)sizeof(struct iphdr)) { runt: comp->sls_i_runt++; return slhc_toss(comp); } iph = (struct iphdr *)icp; /* Peek at the IP header's IHL field to find its length */ ihl = iph->ihl; /* The IP header length field is too small, * or packet is shorter than the IP header followed * by minimal tcp header. */ if (ihl < 5 || isize < ihl * 4 + sizeof(struct tcphdr)) goto runt; index = iph->protocol; iph->protocol = IPPROTO_TCP; if (ip_fast_csum(icp, ihl)) { /* Bad IP header checksum; discard */ comp->sls_i_badcheck++; return slhc_toss(comp); } if (index > comp->rslot_limit) { comp->sls_i_error++; return slhc_toss(comp); } th = (struct tcphdr *)(icp + ihl * 4); if (th->doff < sizeof(struct tcphdr) / 4) goto runt; if (isize < ihl * 4 + th->doff * 4) goto runt; /* Update local state */ cs = &comp->rstate[comp->recv_current = index]; comp->flags &=~ SLF_TOSS; memcpy(&cs->cs_ip, iph, sizeof(*iph)); memcpy(&cs->cs_tcp, th, sizeof(*th)); if (ihl > 5) memcpy(cs->cs_ipopt, &iph[1], (ihl - 5) * 4); if (th->doff > 5) memcpy(cs->cs_tcpopt, &th[1], (th->doff - 5) * 4); cs->cs_hsize = ihl*2 + th->doff*2; cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ comp->sls_i_uncompressed++; return isize; } int slhc_toss(struct slcompress *comp) { if ( comp == NULLSLCOMPR ) return 0; comp->flags |= SLF_TOSS; return 0; } #else /* CONFIG_INET */ int slhc_toss(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_toss"); return -EINVAL; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_uncompress"); return -EINVAL; } int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_compress"); return -EINVAL; } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_remember"); return -EINVAL; } void slhc_free(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_free"); } struct slcompress * slhc_init(int rslots, int tslots) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_init"); return NULL; } #endif /* CONFIG_INET */ /* VJ header compression */ EXPORT_SYMBOL(slhc_init); EXPORT_SYMBOL(slhc_free); EXPORT_SYMBOL(slhc_remember); EXPORT_SYMBOL(slhc_compress); EXPORT_SYMBOL(slhc_uncompress); EXPORT_SYMBOL(slhc_toss); MODULE_DESCRIPTION("Compression helpers for SLIP (serial line)"); MODULE_LICENSE("Dual BSD/GPL");
13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 /* * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> #include <linux/xarray.h> #include <rdma/ib_cache.h> #include "mad_priv.h" #include "core_priv.h" #include "mad_rmpp.h" #include "smi.h" #include "opa_smi.h" #include "agent.h" #define CREATE_TRACE_POINTS #include <trace/events/ib_mad.h> #ifdef CONFIG_TRACEPOINTS static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry) { struct ib_ud_wr *wr = &mad_send_wr->send_wr; struct rdma_ah_attr attr = {}; rdma_query_ah(wr->ah, &attr); /* These are common */ entry->sl = attr.sl; entry->rqpn = wr->remote_qpn; entry->rqkey = wr->remote_qkey; entry->dlid = rdma_ah_get_dlid(&attr); } #endif static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; module_param_named(send_queue_size, mad_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); static DEFINE_XARRAY_ALLOC1(ib_mad_clients); static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req); static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); static void timeout_sends(struct work_struct *work); static void local_completions(struct work_struct *work); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc); static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); /* * Returns a ib_mad_port_private structure or NULL for a device/port * Assumes ib_mad_port_list_lock is being held */ static inline struct ib_mad_port_private * __ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; list_for_each_entry(entry, &ib_mad_port_list, port_list) { if (entry->device == device && entry->port_num == port_num) return entry; } return NULL; } /* * Wrapper function to return a ib_mad_port_private structure or NULL * for a device/port */ static inline struct ib_mad_port_private * ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); entry = __ib_get_mad_port(device, port_num); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); return entry; } static inline u8 convert_mgmt_class(u8 mgmt_class) { /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? 0 : mgmt_class; } static int get_spl_qp_index(enum ib_qp_type qp_type) { switch (qp_type) { case IB_QPT_SMI: return 0; case IB_QPT_GSI: return 1; default: return -1; } } static int vendor_class_index(u8 mgmt_class) { return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; } static int is_vendor_class(u8 mgmt_class) { if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) return 0; return 1; } static int is_vendor_oui(char *oui) { if (oui[0] || oui[1] || oui[2]) return 1; return 0; } static int is_vendor_method_in_use( struct ib_mad_mgmt_vendor_class *vendor_class, struct ib_mad_reg_req *mad_reg_req) { struct ib_mad_mgmt_method_table *method; int i; for (i = 0; i < MAX_MGMT_OUI; i++) { if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { method = vendor_class->method_table[i]; if (method) { if (method_in_use(&method, mad_reg_req)) return 1; else break; } } } return 0; } int ib_response_mad(const struct ib_mad_hdr *hdr) { return ((hdr->method & IB_MGMT_METHOD_RESP) || (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs * * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_reg_req *reg_req = NULL; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; u8 mgmt_class, vclass; if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) return ERR_PTR(-EPROTONOSUPPORT); /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", __func__, qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { dev_dbg_ratelimited(&device->dev, "%s: invalid RMPP Version %u\n", __func__, rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { dev_dbg_ratelimited(&device->dev, "%s: invalid Class Version %u\n", __func__, mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { dev_dbg_ratelimited(&device->dev, "%s: no recv_handler\n", __func__); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { dev_dbg_ratelimited(&device->dev, "%s: Invalid Mgmt Class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ dev_dbg_ratelimited(&device->dev, "%s: Invalid Mgmt Class 0\n", __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { dev_dbg_ratelimited(&device->dev, "%s: No OUI specified for class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { dev_dbg_ratelimited(&device->dev, "%s: RMPP version for non-RMPP class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_dbg_ratelimited(&device->dev, "%s: Invalid SM QP type: class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_dbg_ratelimited(&device->dev, "%s: Invalid GS QP type: class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } } else { /* No registration request supplied */ if (!send_handler) goto error1; if (registration_flags & IB_MAD_USER_RMPP) goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n", __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } /* Verify the QP requested is supported. For example, Ethernet devices * will not have QP0. */ if (!port_priv->qp_info[qpn].qp) { dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } if (mad_reg_req) { reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } } /* Now, fill in the various structures */ mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; mad_agent_priv->agent.rmpp_version = rmpp_version; mad_agent_priv->agent.device = device; mad_agent_priv->agent.recv_handler = recv_handler; mad_agent_priv->agent.send_handler = send_handler; mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); if (ret2) { ret = ERR_PTR(ret2); goto error4; } /* * The mlx4 driver uses the top byte to distinguish which virtual * function generated the MAD, so we must avoid using it. */ ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), &ib_mad_client_next, GFP_KERNEL); if (ret2 < 0) { ret = ERR_PTR(ret2); goto error5; } /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { class = port_priv->version[mad_reg_req-> mgmt_class_version].class; if (class) { method = class->method_table[mgmt_class]; if (method) { if (method_in_use(&method, mad_reg_req)) goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, mgmt_class); } else { /* "New" vendor class range */ vendor = port_priv->version[mad_reg_req-> mgmt_class_version].vendor; if (vendor) { vclass = vendor_class_index(mgmt_class); vendor_class = vendor->vendor_class[vclass]; if (vendor_class) { if (is_vendor_method_in_use( vendor_class, mad_reg_req)) goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); goto error6; } } spin_unlock_irq(&port_priv->reg_lock); trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); error3: kfree(mad_agent_priv); error1: return ret; } EXPORT_SYMBOL(ib_register_mad_agent); static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; /* Note that we could still be handling received MADs */ trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response * MADs, preventing us from queuing additional work */ cancel_mads(mad_agent_priv); port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); spin_unlock_irq(&port_priv->reg_lock); xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); kfree_rcu(mad_agent_priv, rcu); } /* * ib_unregister_mad_agent - Unregisters a client from using MAD services * * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; unsigned long flags; mad_queue = mad_list->mad_queue; spin_lock_irqsave(&mad_queue->lock, flags); list_del(&mad_list->list); mad_queue->count--; spin_unlock_irqrestore(&mad_queue->lock, flags); } static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, u16 pkey_index, u32 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); wc->wr_cqe = cqe; wc->status = IB_WC_SUCCESS; wc->opcode = IB_WC_RECV; wc->pkey_index = pkey_index; wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); wc->src_qp = IB_QP0; wc->qp = qp; wc->slid = slid; wc->sl = 0; wc->dlid_path_bits = 0; wc->port_num = port_num; } static size_t mad_priv_size(const struct ib_mad_private *mp) { return sizeof(struct ib_mad_private) + mp->mad_size; } static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) { size_t size = sizeof(struct ib_mad_private) + mad_size; struct ib_mad_private *ret = kzalloc(size, flags); if (ret) ret->mad_size = mad_size; return ret; } static size_t port_mad_size(const struct ib_mad_port_private *port_priv) { return rdma_max_mad_size(port_priv->device, port_priv->port_num); } static size_t mad_priv_dma_size(const struct ib_mad_private *mp) { return sizeof(struct ib_grh) + mp->mad_size; } /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr) { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; u32 port_num; struct ib_wc mad_wc; struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; /* * Directed route handling starts if the initial LID routed part of * a request or the ending LID routed part of a response is empty. * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; trace_ib_mad_handle_out_opa_smi(opa_smp); if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && opa_smi_handle_dr_smp_send(opa_smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", opa_drslid); goto out; } drslid = (u16)(opa_drslid & 0x0000ffff); /* Check to post send on QP or process locally */ if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { trace_ib_mad_handle_out_ib_smi(smp); if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); goto out; } drslid = be16_to_cpu(smp->dr_slid); /* Check to post send on QP or process locally */ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; goto out; } local->mad_priv = NULL; local->recv_mad_agent = NULL; mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; kfree(local); goto out; } build_smp_wc(mad_agent_priv->agent.qp, send_wr->wr.wr_cqe, drslid, send_wr->pkey_index, send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + mad_send_wr->send_buf.data_len + sizeof(struct ib_grh); } /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, (const struct ib_mad *)smp, (struct ib_mad *)mad_priv->mad, &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; /* * Reference MAD agent until receive * side of local completion handled */ refcount_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; if (opa) { local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); queue_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->local_work); ret = 1; out: return ret; } static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; } else return seg_size; } static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_segment *s, *t; list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { list_del(&s->list); kfree(s); } } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; send_buf->seg_size = mad_size - send_buf->hdr_len; send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; /* Allocate data segments. */ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask); if (!seg) { free_send_rmpp_list(send_wr); return -ENOMEM; } seg->num = ++send_buf->seg_count; list_add_tail(&seg->list, &send_wr->rmpp_list); } /* Zero any padding */ if (pad) memset(seg->data + seg_size - pad, 0, pad); rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> agent.rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); send_wr->cur_seg = container_of(send_wr->rmpp_list.next, struct ib_rmpp_segment, list); send_wr->last_ack_seg = send_wr->cur_seg; return 0; } int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask, u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; size_t mad_size; bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); if (opa && base_version == OPA_MGMT_BASE_VERSION) mad_size = sizeof(struct opa_mad); else mad_size = sizeof(struct ib_mad); pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); mad_send_wr = buf + size; INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; mad_send_wr->send_buf.hdr_len = hdr_len; mad_send_wr->send_buf.data_len = data_len; mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; /* OPA MADs don't have to be the full 2048 bytes */ if (opa && base_version == OPA_MGMT_BASE_VERSION && data_len < mad_size - hdr_len) mad_send_wr->sg_list[1].length = data_len; else mad_send_wr->sg_list[1].length = mad_size - hdr_len; mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; mad_send_wr->send_wr.wr.num_sge = 2; mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.remote_qpn = remote_qpn; mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); } } mad_send_wr->send_buf.mad_agent = mad_agent; refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); int ib_get_mad_data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) return IB_MGMT_SA_HDR; else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS)) return IB_MGMT_DEVICE_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) return IB_MGMT_VENDOR_HDR; else return IB_MGMT_MAD_HDR; } EXPORT_SYMBOL(ib_get_mad_data_offset); int ib_is_mad_class_rmpp(u8 mgmt_class) { if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS) || ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) return 1; return 0; } EXPORT_SYMBOL(ib_is_mad_class_rmpp); void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) { struct ib_mad_send_wr_private *mad_send_wr; struct list_head *list; mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); list = &mad_send_wr->cur_seg->list; if (mad_send_wr->cur_seg->num < seg_num) { list_for_each_entry(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } else if (mad_send_wr->cur_seg->num > seg_num) { list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } return mad_send_wr->cur_seg->data; } EXPORT_SYMBOL(ib_get_rmpp_segment); static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) { if (mad_send_wr->send_buf.seg_count) return ib_get_rmpp_segment(&mad_send_wr->send_buf, mad_send_wr->seg_num); else return mad_send_wr->send_buf.mad + mad_send_wr->send_buf.hdr_len; } void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); free_send_rmpp_list(mad_send_wr); kfree(send_buf->mad); deref_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; int ret; /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; sge[0].addr = ib_dma_map_single(mad_agent->device, mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) return -ENOMEM; mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); return -ENOMEM; } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, NULL); list = &qp_info->send_queue.list; } else { ret = 0; list = &qp_info->overflow_list; } if (!ret) { qp_info->send_queue.count++; list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); if (ret) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_agent->device, mad_send_wr->payload_mapping, sge[1].length, DMA_TO_DEVICE); } return ret; } /* * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf *next_send_buf; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ for (; send_buf; send_buf = next_send_buf) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_agent_priv = mad_send_wr->mad_agent_priv; ret = ib_mad_enforce_security(mad_agent_priv, mad_send_wr->send_wr.pkey_index); if (ret) goto error; if (!send_buf->mad_agent->send_handler || (send_buf->timeout_ms && !send_buf->mad_agent->recv_handler)) { ret = -EINVAL; goto error; } if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { if (mad_agent_priv->agent.rmpp_version) { ret = -EINVAL; goto error; } } /* * Save pointer to next work request to post in case the * current one completes, and the user modifies the work * request associated with the completion */ next_send_buf = send_buf->next; mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { ret = handle_outgoing_dr_smp(mad_agent_priv, mad_send_wr); if (ret < 0) /* error */ goto error; else if (ret == 1) /* locally consumed */ continue; } mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; /* Reference MAD agent until send completes */ refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); } else ret = ib_send_mad(mad_send_wr); if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); deref_mad_agent(mad_agent_priv); goto error; } } return 0; error: if (bad_send_buf) *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); /* * ib_free_recv_mad - Returns data buffers used to receive * a MAD to the access layer */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *priv; struct list_head free_list; INIT_LIST_HEAD(&free_list); list_splice_init(&mad_recv_wc->rmpp_list, &free_list); list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, &free_list, list) { mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, recv_buf); mad_priv_hdr = container_of(mad_recv_wc, struct ib_mad_private_header, recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { int i; for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { pr_err("Method %d already in use\n", i); return -EINVAL; } } return 0; } static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); return (*method) ? 0 : (-ENOMEM); } /* * Check to see if there are any methods still in use */ static int check_method_table(struct ib_mad_mgmt_method_table *method) { int i; for (i = 0; i < IB_MGMT_MAX_METHODS; i++) if (method->agent[i]) return 1; return 0; } /* * Check to see if there are any method tables for this class still in use */ static int check_class_table(struct ib_mad_mgmt_class_table *class) { int i; for (i = 0; i < MAX_MGMT_CLASS; i++) if (class->method_table[i]) return 1; return 0; } static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) if (vendor_class->method_table[i]) return 1; return 0; } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, const char *oui) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) /* Is there matching OUI for this vendor class ? */ if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; } static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) { int i; for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) if (vendor->vendor_class[i]) return 1; return 0; } static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, struct ib_mad_agent_private *agent) { int i; /* Remove any methods for this mad agent */ for (i = 0; i < IB_MGMT_MAX_METHODS; i++) if (method->agent[i] == agent) method->agent[i] = NULL; } static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table **class; struct ib_mad_mgmt_method_table **method; int i, ret; port_priv = agent_priv->qp_info->port_priv; class = &port_priv->version[mad_reg_req->mgmt_class_version].class; if (!*class) { /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { ret = -ENOMEM; goto error1; } /* Allocate method table for this management class */ method = &(*class)->method_table[mgmt_class]; if ((ret = allocate_method_table(method))) goto error2; } else { method = &(*class)->method_table[mgmt_class]; if (!*method) { /* Allocate method table for this management class */ if ((ret = allocate_method_table(method))) goto error1; } } /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error3; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error3: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; goto error1; error2: kfree(*class); *class = NULL; error1: return ret; } static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_vendor_class_table **vendor_table; struct ib_mad_mgmt_vendor_class_table *vendor = NULL; struct ib_mad_mgmt_vendor_class *vendor_class = NULL; struct ib_mad_mgmt_method_table **method; int i, ret = -ENOMEM; u8 vclass; /* "New" vendor (with OUI) class */ vclass = vendor_class_index(mad_reg_req->mgmt_class); port_priv = agent_priv->qp_info->port_priv; vendor_table = &port_priv->version[ mad_reg_req->mgmt_class_version].vendor; if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) goto error1; *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) goto error2; (*vendor_table)->vendor_class[vclass] = vendor_class; } for (i = 0; i < MAX_MGMT_OUI; i++) { /* Is there matching OUI for this vendor class ? */ if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; if (!*method) goto error3; goto check_in_use; } } for (i = 0; i < MAX_MGMT_OUI; i++) { /* OUI slot available ? */ if (!is_vendor_oui((*vendor_table)->vendor_class[ vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; /* Allocate method table for this OUI */ if (!*method) { ret = allocate_method_table(method); if (ret) goto error3; } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; } } dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error4; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error4: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; error3: if (vendor_class) { (*vendor_table)->vendor_class[vclass] = NULL; kfree(vendor_class); } error2: if (vendor) { *vendor_table = NULL; kfree(vendor); } error1: return ret; } static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; int index; u8 mgmt_class; /* * Was MAD registration request supplied * with original registration ? */ if (!agent_priv->reg_req) goto out; port_priv = agent_priv->qp_info->port_priv; mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); class = port_priv->version[ agent_priv->reg_req->mgmt_class_version].class; if (!class) goto vendor_check; method = class->method_table[mgmt_class]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); class->method_table[mgmt_class] = NULL; /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); port_priv->version[ agent_priv->reg_req-> mgmt_class_version].class = NULL; } } } vendor_check: if (!is_vendor_class(mgmt_class)) goto out; /* normalize mgmt_class to vendor range 2 */ mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); vendor = port_priv->version[ agent_priv->reg_req->mgmt_class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[mgmt_class]; if (vendor_class) { index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); if (index < 0) goto out; method = vendor_class->method_table[index]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* * Now, check to see if there are * any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); vendor_class->method_table[index] = NULL; memset(vendor_class->oui[index], 0, 3); /* Any OUIs left ? */ if (!check_vendor_class(vendor_class)) { /* If not, release vendor class table */ kfree(vendor_class); vendor->vendor_class[mgmt_class] = NULL; /* Any other vendor classes left ? */ if (!check_vendor_table(vendor)) { kfree(vendor); port_priv->version[ agent_priv->reg_req-> mgmt_class_version]. vendor = NULL; } } } } } out: return; } static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; if (ib_response_mad(mad_hdr)) { u32 hi_tid; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); mad_agent = xa_load(&ib_mad_clients, hi_tid); if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; const struct ib_vendor_mad *vendor_mad; int index; spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ mad_hdr->class_version].class; if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; if (method) mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } if (mad_agent) refcount_inc(&mad_agent->refcount); out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); } if (mad_agent && !mad_agent->agent.recv_handler) { dev_notice(&port_priv->device->dev, "No receive handler for client %p on port %u\n", &mad_agent->agent, port_priv->port_num); deref_mad_agent(mad_agent); mad_agent = NULL; } return mad_agent; } static int validate_mad(const struct ib_mad_hdr *mad_hdr, const struct ib_mad_qp_info *qp_info, bool opa) { int valid = 0; u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { pr_err("MAD received with unsupported base version %u %s\n", mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { /* CM attributes other than ClassPortInfo only use Send method */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && (mad_hdr->method != IB_MGMT_METHOD_SEND)) goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; } out: return valid; } static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc) { return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc) { struct rdma_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; u32 port_num = mad_agent_priv->agent.port_num; u8 lmc; bool has_grh; send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ return 0; if (rdma_query_ah(wr->send_buf.ah, &attr)) /* Assume not equal, to avoid false positives. */ return 0; has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH); if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; if (!send_resp && rcv_resp) { /* is request/response. */ if (!has_grh) { if (ib_get_cached_lmc(device, port_num, &lmc)) return 0; return (!lmc || !((rdma_ah_get_path_bits(&attr) ^ rwc->wc->dlid_path_bits) & ((1 << lmc) - 1))); } else { const struct ib_global_route *grh = rdma_ah_read_grh(&attr); if (rdma_query_gid(device, port_num, grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); } } if (!has_grh) return rdma_ah_get_dlid(&attr) == rwc->wc->slid; else return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw, rwc->recv_buf.grh->sgid.raw, 16); } static inline int is_direct(u8 class) { return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); } struct ib_mad_send_wr_private* ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; const struct ib_mad_hdr *mad_hdr; mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } /* * It's possible to receive the response before we've * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } return NULL; } void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; if (mad_send_wr->refcount == 1) list_move_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->done_list); } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); ret = ib_mad_enforce_security(mad_agent_priv, mad_recv_wc->wc->pkey_index); if (ret) { ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { deref_mad_agent(mad_agent_priv); return; } } /* Complete corresponding request */ if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { /* user rmpp is in effect * and this is an active RMPP MAD */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } else { /* not user rmpp, revert to normal behavior and * drop the mad */ ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } } else { ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); deref_mad_agent(mad_agent_priv); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } } static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, const struct ib_mad_qp_info *qp_info, const struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; trace_ib_mad_handle_ib_smi(smp); if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, smi_get_fwd_port(smp), qp_info->qp->qp_num, response->mad_size, false); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static bool generate_unmatched_resp(const struct ib_mad_private *recv, struct ib_mad_private *response, size_t *resp_len, bool opa) { const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; if (recv_hdr->method == IB_MGMT_METHOD_GET || recv_hdr->method == IB_MGMT_METHOD_SET) { memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; resp_hdr->method = IB_MGMT_METHOD_GET_RESP; resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) resp_hdr->status |= IB_SMP_DIRECTION; if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) *resp_len = opa_get_smp_header_size( (struct opa_smp *)recv->mad); else *resp_len = sizeof(struct ib_mad_hdr); } return true; } else { return false; } } static enum smi_action handle_opa_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; trace_ib_mad_handle_opa_smi(smp); if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = opa_smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (opa_smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.opa_mad = (struct opa_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, opa_smi_get_fwd_port(smp), qp_info->qp->qp_num, recv->header.wc.byte_len, true); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static enum smi_action handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response, bool opa) { struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && mad_hdr->class_version == OPA_SM_CLASS_VERSION) return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, response); return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); } static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv, *response = NULL; struct ib_mad_agent_private *mad_agent; u32 port_num; int ret = IB_MAD_RESULT_SUCCESS; size_t mad_size; u16 resp_mad_pkey_index = 0; bool opa; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { /* * Receive errors indicate that the QP has entered the error * state - error handling/shutdown code will cleanup */ return; } qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); opa = rdma_cap_opa_mad(qp_info->port_priv->device, qp_info->port_priv->port_num); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { recv->header.recv_wc.mad_len = sizeof(struct ib_mad); recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; trace_ib_mad_recv_done_handler(qp_info, wc, (struct ib_mad_hdr *)recv->mad); mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) goto out; if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { if (handle_smi(port_priv, qp_info, wc, port_num, recv, response, opa) == IB_SMI_DISCARD) goto out; } /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, &recv->grh, (const struct ib_mad *)recv->mad, (struct ib_mad *)response->mad, &mad_size, &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); goto out; } } } mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && generate_unmatched_resp(recv, response, &mad_size, opa)) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } } } static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *temp_mad_send_wr; struct list_head *list_item; unsigned long delay; mad_agent_priv = mad_send_wr->mad_agent_priv; list_del(&mad_send_wr->agent_list); delay = mad_send_wr->timeout; mad_send_wr->timeout += jiffies; if (delay) { list_for_each_prev(list_item, &mad_agent_priv->wait_list) { temp_mad_send_wr = list_entry(list_item, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, temp_mad_send_wr->timeout)) break; } } else { list_item = &mad_agent_priv->wait_list; } list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, unsigned long timeout_ms) { mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); wait_for_response(mad_send_wr); } /* * Process a send work completion */ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; int ret; mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; } else ret = IB_RMPP_RESULT_UNHANDLED; if (mad_send_wc->status != IB_WC_SUCCESS && mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = mad_send_wc->status; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } if (--mad_send_wr->refcount > 0) { if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && mad_send_wr->status == IB_WC_SUCCESS) { wait_for_response(mad_send_wr); } goto done; } /* Remove send from MAD agent and notify client of completion */ list_del(&mad_send_wr->agent_list); adjust_timeout(mad_agent_priv); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (mad_send_wr->status != IB_WC_SUCCESS) mad_send_wc->status = mad_send_wr->status; if (ret == IB_RMPP_RESULT_INTERNAL) ib_rmpp_send_handler(mad_send_wc); else mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { if (!ib_mad_send_error(port_priv, wc)) return; } mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); trace_ib_mad_send_done_handler(mad_send_wr, wc); retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->payload_mapping, mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); /* Move queued send to the send queue */ if (send_queue->count-- > send_queue->max_active) { mad_list = container_of(qp_info->overflow_list.next, struct ib_mad_list_head, list); queued_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); list_move_tail(&mad_list->list, &send_queue->list); } spin_unlock_irqrestore(&send_queue->lock, flags); mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { trace_ib_mad_send_done_resend(queued_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, NULL); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; } } } static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_list_head *mad_list; unsigned long flags; spin_lock_irqsave(&qp_info->send_queue.lock, flags); list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); mad_send_wr->retry = 1; } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); } static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; struct ib_mad_send_wr_private *mad_send_wr; int ret; /* * Send errors will transition the QP to SQE - move * QP to RTS and repost flushed work requests */ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ mad_send_wr->retry = 0; trace_ib_mad_error_handler(mad_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, NULL); if (!ret) return false; } } else { struct ib_qp_attr *attr; /* Transition QP to RTS and fail offending send */ attr = kmalloc(sizeof *attr, GFP_KERNEL); if (attr) { attr->qp_state = IB_QPS_RTS; attr->cur_qp_state = IB_QPS_SQE; ret = ib_modify_qp(qp_info->qp, attr, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) dev_err(&port_priv->device->dev, "%s - ib_modify_qp to RTS: %d\n", __func__, ret); else mark_sends_for_retry(qp_info); } } return true; } static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) { unsigned long flags; struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; struct ib_mad_send_wc mad_send_wc; struct list_head cancel_list; INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ mad_send_wc.status = IB_WC_WR_FLUSH_ERR; mad_send_wc.vendor_err = 0; list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { mad_send_wc.send_buf = &mad_send_wr->send_buf; list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); deref_mad_agent(mad_agent_priv); } } static struct ib_mad_send_wr_private* find_send_wr(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; if (!send_buf) return -EINVAL; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); if (!timeout_ms) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else ib_reset_mad_timeout(mad_send_wr, timeout_ms); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return 0; } EXPORT_SYMBOL(ib_modify_mad); static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, struct ib_mad_local_private, completion_list); list_del(&local->completion_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } /* * Defined behavior is to complete response * before request */ build_smp_wc(recv_mad_agent->agent.qp, local->mad_send_wr->send_wr.wr.wr_cqe, be16_to_cpu(IB_LID_PERMISSIVE), local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; if (opa && base_version == OPA_MGMT_BASE_VERSION) { local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); deref_mad_agent(mad_agent_priv); if (free_mad) kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; if (!mad_send_wr->retries_left) return -ETIMEDOUT; mad_send_wr->retries_left--; mad_send_wr->send_buf.retries++; mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: ret = ib_send_mad(mad_send_wr); break; case IB_RMPP_RESULT_CONSUMED: ret = 0; break; default: ret = -ECOMM; break; } } else ret = ib_send_mad(mad_send_wr); if (!ret) { mad_send_wr->refcount++; list_add_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->send_list); } return ret; } static void timeout_sends(struct work_struct *work) { struct ib_mad_send_wr_private *mad_send_wr, *n; struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wc mad_send_wc; struct list_head local_list; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); mad_send_wc.vendor_err = 0; INIT_LIST_HEAD(&local_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, jiffies)) { delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(mad_agent_priv->qp_info-> port_priv->wq, &mad_agent_priv->timed_work, delay); break; } list_del_init(&mad_send_wr->agent_list); if (mad_send_wr->status == IB_WC_SUCCESS && !retry_send(mad_send_wr)) continue; list_add_tail(&mad_send_wr->agent_list, &local_list); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, n, &local_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); deref_mad_agent(mad_agent_priv); } } /* * Allocate receive MADs and post receive WRs for them */ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad) { unsigned long flags; int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; /* Initialize common receive WR fields */ recv_wr.next = NULL; recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; do { /* Allocate and map receive buffer */ if (mad) { mad_priv = mad; mad = NULL; } else { mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; break; } } sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { kfree(mad_priv); ret = -ENOMEM; break; } mad_priv->header.mapping = sg_list.addr; mad_priv->header.mad_list.mad_queue = recv_queue; mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; /* Post receive WR */ spin_lock_irqsave(&recv_queue->lock, flags); post = (++recv_queue->count < recv_queue->max_active); list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; } } while (post); return ret; } /* * Return all the posted receive MADs */ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) { struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; if (!qp_info->qp) return; while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, struct ib_mad_list_head, list); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); /* Remove from posted receive MAD list */ list_del(&mad_list->list); ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); kfree(recv); } qp_info->recv_queue.count = 0; } /* * Start the port */ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) { int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; ret = ib_find_pkey(port_priv->device, port_priv->port_num, IB_DEFAULT_PKEY_FULL, &pkey_index); if (ret) pkey_index = 0; for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) continue; /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to INIT: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTR: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTS; attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTS: %d\n", i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { dev_err(&port_priv->device->dev, "Failed to request completion notification: %d\n", ret); goto out; } for (i = 0; i < IB_MAD_QPS_CORE; i++) { if (!port_priv->qp_info[i].qp) continue; ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { dev_err(&port_priv->device->dev, "Couldn't post receive WRs\n"); goto out; } } out: kfree(attr); return ret; } static void qp_event_handler(struct ib_event *event, void *qp_context) { struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ dev_err(&qp_info->port_priv->device->dev, "Fatal error (%d) on MAD QP (%u)\n", event->event, qp_info->qp->qp_num); } static void init_mad_queue(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *mad_queue) { mad_queue->qp_info = qp_info; mad_queue->count = 0; spin_lock_init(&mad_queue->lock); INIT_LIST_HEAD(&mad_queue->list); } static void init_mad_qp(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info) { qp_info->port_priv = port_priv; init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, enum ib_qp_type qp_type) { struct ib_qp_init_attr qp_init_attr; int ret; memset(&qp_init_attr, 0, sizeof qp_init_attr); qp_init_attr.send_cq = qp_info->port_priv->cq; qp_init_attr.recv_cq = qp_info->port_priv->cq; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; qp_init_attr.cap.max_send_wr = mad_sendq_size; qp_init_attr.cap.max_recv_wr = mad_recvq_size; qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; qp_init_attr.qp_type = qp_type; qp_init_attr.port_num = qp_info->port_priv->port_num; qp_init_attr.qp_context = qp_info; qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { dev_err(&qp_info->port_priv->device->dev, "Couldn't create ib_mad QP%d\n", get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } /* Use minimum queue sizes unless the CQ is resized */ qp_info->send_queue.max_active = mad_sendq_size; qp_info->recv_queue.max_active = mad_recvq_size; return 0; error: return ret; } static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { if (!qp_info->qp) return; ib_destroy_qp(qp_info->qp); } /* * Open the port * Create the QP, PD, MR, and CQ if needed */ static int ib_mad_port_open(struct ib_device *device, u32 port_num) { int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; int has_smi; if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) return -EFAULT; if (WARN_ON(rdma_cap_opa_mad(device, port_num) && rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) return -ENOMEM; port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); goto error3; } port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); goto error4; } if (has_smi) { ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); if (ret) goto error6; } if (rdma_cap_ib_cm(device, port_num)) { ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; } port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM, port_num); if (!port_priv->wq) { ret = -ENOMEM; goto error8; } spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); ret = ib_mad_port_start(port_priv); if (ret) { dev_err(&device->dev, "Couldn't start port\n"); goto error9; } return 0; error9: spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); error8: destroy_mad_qp(&port_priv->qp_info[1]); error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); error4: ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); return ret; } /* * Close the port * If there are no classes using the port, free the port * resources (CQ, MR, PD, QP) and remove the port's info structure */ static int ib_mad_port_close(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *port_priv; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); dev_err(&device->dev, "Port %u not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); ib_free_cq(port_priv->cq); ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ kfree(port_priv); return 0; } static int ib_mad_init_device(struct ib_device *device) { int start, i; unsigned int count = 0; int ret; start = rdma_start_port(device); for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; ret = ib_mad_port_open(device, i); if (ret) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } ret = ib_agent_port_open(device, i); if (ret) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } count++; } if (!count) return -EOPNOTSUPP; return 0; error_agent: if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); error: while (--i >= start) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } return ret; } static void ib_mad_remove_device(struct ib_device *device, void *client_data) { unsigned int i; rdma_for_each_port (device, i) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %u for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %u\n", i); } } static struct ib_client mad_client = { .name = "mad", .add = ib_mad_init_device, .remove = ib_mad_remove_device }; int ib_mad_init(void) { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; } return 0; } void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); }
138 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IRQDESC_H #define _LINUX_IRQDESC_H #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/mutex.h> /* * Core internal functions to deal with irq descriptors */ struct irq_affinity_notify; struct proc_dir_entry; struct module; struct irq_desc; struct irq_domain; struct pt_regs; /** * struct irqstat - interrupt statistics * @cnt: real-time interrupt count * @ref: snapshot of interrupt count */ struct irqstat { unsigned int cnt; #ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT unsigned int ref; #endif }; /** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler * @action: the irq action chain * @status_use_accessors: status information * @core_internal_state__do_not_mess_with_it: core internal status information * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers * @tot_count: stats field for non-percpu irqs * @irq_count: stats field to detect stalled irqs * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts * @threads_oneshot: bitfield to handle shared oneshot threads * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers * @nr_actions: number of installed actions on this descriptor * @no_suspend_depth: number of irqactions on a irq descriptor with * IRQF_NO_SUSPEND set * @force_resume_depth: number of irqactions on a irq descriptor with * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs * @request_mutex: mutex to protect request/free before locking desc->lock * @dir: /proc/irq/ procfs entry * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output */ struct irq_desc { struct irq_common_data irq_common_data; struct irq_data irq_data; struct irqstat __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction *action; /* IRQ action list */ unsigned int status_use_accessors; unsigned int core_internal_state__do_not_mess_with_it; unsigned int depth; /* nested irq disables */ unsigned int wake_depth; /* nested wake enables */ unsigned int tot_count; unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; raw_spinlock_t lock; struct cpumask *percpu_enabled; const struct cpumask *percpu_affinity; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif #endif unsigned long threads_oneshot; atomic_t threads_active; wait_queue_head_t wait_for_threads; #ifdef CONFIG_PM_SLEEP unsigned int nr_actions; unsigned int no_suspend_depth; unsigned int cond_suspend_depth; unsigned int force_resume_depth; #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS struct dentry *debugfs_file; const char *dev_name; #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; struct kobject kobj; #endif struct mutex request_mutex; int parent_irq; struct module *owner; const char *name; #ifdef CONFIG_HARDIRQS_SW_RESEND struct hlist_node resend_node; #endif } ____cacheline_internodealigned_in_smp; #ifdef CONFIG_SPARSE_IRQ extern void irq_lock_sparse(void); extern void irq_unlock_sparse(void); #else static inline void irq_lock_sparse(void) { } static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) { return desc->irq_data.irq; } static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; } static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc) { return desc->irq_data.chip; } static inline void *irq_desc_get_chip_data(struct irq_desc *desc) { return desc->irq_data.chip_data; } static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { return desc->irq_common_data.handler_data; } /* * Architectures call this to let the generic IRQ layer * handle an interrupt. */ static inline void generic_handle_irq_desc(struct irq_desc *desc) { desc->handle_irq(desc); } int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* * Convert a HW interrupt number to a logical one using a IRQ domain, * and handle the result interrupt number. Return -EINVAL if * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { return desc && desc->action != NULL; } /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq * @handler: Flow control handler function for this interrupt * * Sets the handler in the irq descriptor associated to @data. * * Must be called with irq_desc locked and valid parameters. Typical * call site is the irq_set_type() callback. */ static inline void irq_set_handler_locked(struct irq_data *data, irq_flow_handler_t handler) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; } /** * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region * @data: Pointer to the irq_data structure for which the chip is set * @chip: Pointer to the new irq chip * @handler: Flow control handler function for this interrupt * @name: Name of the interrupt * * Replace the irq chip at the proper hierarchy level in @data and * sets the handler and name in the associated irq descriptor. * * Must be called with irq_desc locked and valid parameters. */ static inline void irq_set_chip_handler_name_locked(struct irq_data *data, const struct irq_chip *chip, irq_flow_handler_t handler, const char *name) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; desc->name = name; data->chip = (struct irq_chip *)chip; } bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); static inline bool irq_balancing_disabled(unsigned int irq) { return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { if (IS_ENABLED(CONFIG_LOCKDEP)) __irq_set_lockdep_class(irq, lock_class, request_class); } #endif
15 8 3 2 2 9 2 5 3 2 5 5 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> #include <net/netfilter/nf_tables.h> struct nft_redir { u8 sreg_proto_min; u8 sreg_proto_max; u16 flags; }; static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, [NFTA_REDIR_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK), }; static int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { int err; err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)); } static int nft_redir_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); unsigned int plen; int err; plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) return err; } else { priv->sreg_proto_max = priv->sreg_proto_min; } priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS])); return nf_ct_netns_get(ctx->net, ctx->family); } static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_redir *priv = nft_expr_priv(expr); if (priv->sreg_proto_min) { if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN, priv->sreg_proto_min)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX, priv->sreg_proto_max)) goto nla_put_failure; } if (priv->flags != 0 && nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_redir_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { range.min_proto.all = (__force __be16) nft_reg_load16(&regs->data[priv->sreg_proto_min]); range.max_proto.all = (__force __be16) nft_reg_load16(&regs->data[priv->sreg_proto_max]); } switch (nft_pf(pkt)) { case NFPROTO_IPV4: regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, nft_hook(pkt)); break; #ifdef CONFIG_NF_TABLES_IPV6 case NFPROTO_IPV6: regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); break; #endif default: WARN_ON_ONCE(1); break; } } static void nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_IPV4); } static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { .family = NFPROTO_IPV4, .name = "redir", .ops = &nft_redir_ipv4_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; #ifdef CONFIG_NF_TABLES_IPV6 static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_IPV6); } static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { .family = NFPROTO_IPV6, .name = "redir", .ops = &nft_redir_ipv6_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; #endif #ifdef CONFIG_NF_TABLES_INET static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_INET); } static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_inet_type __read_mostly = { .family = NFPROTO_INET, .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; static int __init nft_redir_module_init_inet(void) { return nft_register_expr(&nft_redir_inet_type); } #else static inline int nft_redir_module_init_inet(void) { return 0; } #endif static int __init nft_redir_module_init(void) { int ret = nft_register_expr(&nft_redir_ipv4_type); if (ret) return ret; #ifdef CONFIG_NF_TABLES_IPV6 ret = nft_register_expr(&nft_redir_ipv6_type); if (ret) { nft_unregister_expr(&nft_redir_ipv4_type); return ret; } #endif ret = nft_redir_module_init_inet(); if (ret < 0) { nft_unregister_expr(&nft_redir_ipv4_type); #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif return ret; } return ret; } static void __exit nft_redir_module_exit(void) { nft_unregister_expr(&nft_redir_ipv4_type); #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif #ifdef CONFIG_NF_TABLES_INET nft_unregister_expr(&nft_redir_inet_type); #endif } module_init(nft_redir_module_init); module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_EXPR("redir"); MODULE_DESCRIPTION("Netfilter nftables redirect support");
2257 3 818 648 212 3 665 664 39 648 664 879 1217 19 882 644 161 161 161 31 50 20 50 538 62 2352 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _IP6_FIB_H #define _IP6_FIB_H #include <linux/ipv6_route.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> #include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> #include <uapi/linux/bpf.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 #else #define FIB6_TABLE_HASHSZ 1 #endif #define RT6_DEBUG 2 struct rt6_info; struct fib6_info; struct fib6_config { u32 fc_table; u32 fc_metric; int fc_dst_len; int fc_src_len; int fc_ifindex; u32 fc_flags; u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, fc_ignore_dev_down:1, __unused : 14; u32 fc_nh_id; struct in6_addr fc_dst; struct in6_addr fc_src; struct in6_addr fc_prefsrc; struct in6_addr fc_gateway; unsigned long fc_expires; struct nlattr *fc_mx; int fc_mx_len; int fc_mp_len; struct nlattr *fc_mp; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; bool fc_is_fdb; }; struct fib6_node { struct fib6_node __rcu *parent; struct fib6_node __rcu *left; struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; struct fib6_gc_args { int timeout; int more; }; #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL static inline bool fib6_routes_require_src(const struct net *net) { return false; } static inline void fib6_routes_require_src_inc(struct net *net) {} static inline void fib6_routes_require_src_dec(struct net *net) {} #else static inline bool fib6_routes_require_src(const struct net *net) { return net->ipv6.fib6_routes_require_src > 0; } static inline void fib6_routes_require_src_inc(struct net *net) { net->ipv6.fib6_routes_require_src++; } static inline void fib6_routes_require_src_dec(struct net *net) { net->ipv6.fib6_routes_require_src--; } #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif /* * routing information * */ struct rt6key { struct in6_addr addr; int plen; }; struct fib6_table; struct rt6_exception_bucket { struct hlist_head chain; int depth; }; struct rt6_exception { struct hlist_node hlist; struct rt6_info *rt6i; unsigned long stamp; struct rcu_head rcu; }; #define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 struct fib6_nh { struct fib_nh_common nh_common; #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; #endif struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; }; struct fib6_info { struct fib6_table *fib6_table; struct fib6_info __rcu *fib6_next; struct fib6_node __rcu *fib6_node; /* Multipath routes: * siblings is a list of fib6_info that have the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ union { struct list_head fib6_siblings; struct list_head nh_list; }; unsigned int fib6_nsiblings; refcount_t fib6_ref; unsigned long expires; struct hlist_node gc_link; struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] struct rt6key fib6_dst; u32 fib6_flags; struct rt6key fib6_src; struct rt6key fib6_prefsrc; u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; u8 offload; u8 trap; u8 offload_failed; u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, fib6_destroying:1, unused:4; struct rcu_head rcu; struct nexthop *nh; struct fib6_nh fib6_nh[]; }; struct rt6_info { struct dst_entry dst; struct fib6_info __rcu *from; int sernum; struct rt6key rt6i_dst; struct rt6key rt6i_src; struct in6_addr rt6i_gateway; struct inet6_dev *rt6i_idev; u32 rt6i_flags; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; }; struct fib6_result { struct fib6_nh *nh; struct fib6_info *f6i; u32 fib6_flags; u8 fib6_type; struct rt6_info *rt6; }; #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ rt = rcu_dereference(rt->fib6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ rt = rcu_dereference_protected(rt->fib6_next, 1)) #define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst) static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst) { return dst_rt6_info(dst)->rt6i_idev; } static inline bool fib6_requires_src(const struct fib6_info *rt) { return rt->fib6_src.plen > 0; } /* The callers should hold f6i->fib6_table->tb6_lock if a route has ever * been added to a table before. */ static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } /* The callers should hold f6i->fib6_table->tb6_lock if a route has ever * been added to a table before. */ static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { f6i->expires = expires; f6i->fib6_flags |= RTF_EXPIRES; } static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) return time_after(jiffies, f6i->expires); return false; } /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely * Return false if not */ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, u32 *cookie) { struct fib6_node *fn; bool status = false; fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; } return status; } static inline u32 rt6_get_cookie(const struct rt6_info *rt) { struct fib6_info *from; u32 cookie = 0; if (rt->sernum) return rt->sernum; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) fib6_get_cookie_safe(from, &cookie); rcu_read_unlock(); return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rt6_info */ BUILD_BUG_ON(offsetof(struct rt6_info, dst) != 0); dst_release(&rt->dst); } struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { refcount_inc(&f6i->fib6_ref); } static inline bool fib6_info_hold_safe(struct fib6_info *f6i) { return refcount_inc_not_zero(&f6i->fib6_ref); } static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu_hurry(&f6i->rcu, fib6_info_destroy_rcu); } } enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif FWS_L, FWS_R, FWS_C, FWS_U }; struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; unsigned int skip_in_node; int (*func)(struct fib6_walker *); void *args; }; struct rt6_statistics { __u32 fib_nodes; /* all fib6 nodes */ __u32 fib_route_nodes; /* intermediate nodes */ __u32 fib_rt_entries; /* rt entries in fib table */ __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ }; #define RTN_TL_ROOT 0x0001 #define RTN_ROOT 0x0002 /* tree root node */ #define RTN_RTINFO 0x0004 /* node with valid routing info */ /* * priority levels (or metrics) * */ struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX #define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN #define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; unsigned int nsiblings; }; /* * exported functions */ struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); /* called with rcu lock held; can return error pointer * caller needs to select path */ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags); /* called with rcu lock held; caller needs to select path */ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, struct fib6_result *res, int strict); void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict); struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, const struct in6_addr *saddr, int src_len, bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); void fib6_clean_all_skip_notify(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); int fib6_del(struct fib6_info *rt, struct nl_info *info); static inline void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) { const struct fib6_info *from; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) *addr = from->fib6_prefsrc.addr; else *addr = in6addr_any; rcu_read_unlock(); } int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, struct netlink_ext_ack *extack); int call_fib6_multipath_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, unsigned int nsiblings, struct netlink_ext_ack *extack); int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); void fib6_gc_cleanup(void); int fib6_init(void); /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. */ static inline void fib6_add_gc_list(struct fib6_info *f6i) { /* If fib6_node is null, the f6i is not in (or removed from) the * table. * * There is a gap between finding the f6i from the table and * calling this function without the protection of the tb6_lock. * This check makes sure the f6i is not added to the gc list when * it is not on the table. */ if (!rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock))) return; if (hlist_unhashed(&f6i->gc_link)) hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist); } /* Remove the route from the gc list if it is on the list. * * The callers should hold f6i->fib6_table->tb6_lock. */ static inline void fib6_remove_gc_list(struct fib6_info *f6i) { if (!hlist_unhashed(&f6i->gc_link)) hlist_del_init(&f6i->gc_link); } struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; loff_t skip; struct fib6_table *tbl; int sernum; }; extern const struct seq_operations ipv6_route_seq_ops; int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(const struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) { return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, bool trap, bool offload_failed); #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct fib6_info *, rt); }; #endif INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup, struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { return INDIRECT_CALL_4(lookup, ip6_pol_route_output, ip6_pol_route_input, ip6_pol_route_lookup, __ip6_route_redirect, net, table, fl6, skb, flags); } #ifdef CONFIG_IPV6_MULTIPLE_TABLES static inline bool fib6_has_custom_rules(const struct net *net) { return net->ipv6.fib6_has_custom_rules; } int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(const struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv6.fib6_rules_require_fldissect) return false; memset(flkeys, 0, sizeof(*flkeys)); __skb_flow_dissect(net, skb, &flow_keys_dissector, flkeys, NULL, 0, 0, 0, flag); fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; fl6->flowi6_proto = flkeys->basic.ip_proto; return true; } #else static inline bool fib6_has_custom_rules(const struct net *net) { return false; } static inline int fib6_rules_init(void) { return 0; } static inline void fib6_rules_cleanup(void) { return ; } static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib6_rules_seq_read(const struct net *net) { return 0; } static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { return false; } #endif #endif
12 12 12 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 // SPDX-License-Identifier: GPL-2.0-or-later /* * Module kallsyms support * * Copyright (C) 2010 Rusty Russell */ #include <linux/module.h> #include <linux/module_symbol.h> #include <linux/kallsyms.h> #include <linux/buildid.h> #include <linux/bsearch.h> #include "internal.h" /* Lookup exported symbol in given range of kernel_symbols */ static const struct kernel_symbol *lookup_exported_symbol(const char *name, const struct kernel_symbol *start, const struct kernel_symbol *stop) { return bsearch(name, start, stop - start, sizeof(struct kernel_symbol), cmp_name); } static int is_exported(const char *name, unsigned long value, const struct module *mod) { const struct kernel_symbol *ks; if (!mod) ks = lookup_exported_symbol(name, __start___ksymtab, __stop___ksymtab); else ks = lookup_exported_symbol(name, mod->syms, mod->syms + mod->num_syms); return ks && kernel_symbol_value(ks) == value; } /* As per nm */ static char elf_type(const Elf_Sym *sym, const struct load_info *info) { const Elf_Shdr *sechdrs = info->sechdrs; if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) return 'v'; else return 'w'; } if (sym->st_shndx == SHN_UNDEF) return 'U'; if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu) return 'a'; if (sym->st_shndx >= SHN_LORESERVE) return '?'; if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) return 't'; if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) return 'r'; else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) return 'g'; else return 'd'; } if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) return 's'; else return 'b'; } if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name, ".debug")) { return 'n'; } return '?'; } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, unsigned int shnum, unsigned int pcpundx) { const Elf_Shdr *sec; enum mod_mem_type type; if (src->st_shndx == SHN_UNDEF || src->st_shndx >= shnum || !src->st_name) return false; #ifdef CONFIG_KALLSYMS_ALL if (src->st_shndx == pcpundx) return true; #endif sec = sechdrs + src->st_shndx; type = sec->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; if (!(sec->sh_flags & SHF_ALLOC) #ifndef CONFIG_KALLSYMS_ALL || !(sec->sh_flags & SHF_EXECINSTR) #endif || mod_mem_type_is_init(type)) return false; return true; } /* * We only allocate and copy the strings needed by the parts of symtab * we keep. This is simple, but has the effect of making multiple * copies of duplicates. We could be more sophisticated, see * linux-kernel thread starting with * <73defb5e4bca04a6431392cc341112b1@localhost>. */ void layout_symtab(struct module *mod, struct load_info *info) { Elf_Shdr *symsect = info->sechdrs + info->index.sym; Elf_Shdr *strsect = info->sechdrs + info->index.str; const Elf_Sym *src; unsigned int i, nsrc, ndst, strtab_size = 0; struct module_memory *mod_mem_data = &mod->mem[MOD_DATA]; struct module_memory *mod_mem_init_data = &mod->mem[MOD_INIT_DATA]; /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; symsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA, symsect, info->index.sym); pr_debug("\t%s\n", info->secstrings + symsect->sh_name); src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name]) + 1; ndst++; } } /* Append room for core symbols at end of core part. */ info->symoffs = ALIGN(mod_mem_data->size, symsect->sh_addralign ?: 1); info->stroffs = mod_mem_data->size = info->symoffs + ndst * sizeof(Elf_Sym); mod_mem_data->size += strtab_size; /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */ info->core_typeoffs = mod_mem_data->size; mod_mem_data->size += ndst * sizeof(char); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA, strsect, info->index.str); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); /* We'll tack temporary mod_kallsyms on the end. */ mod_mem_init_data->size = ALIGN(mod_mem_init_data->size, __alignof__(struct mod_kallsyms)); info->mod_kallsyms_init_off = mod_mem_init_data->size; mod_mem_init_data->size += sizeof(struct mod_kallsyms); info->init_typeoffs = mod_mem_init_data->size; mod_mem_init_data->size += nsrc * sizeof(char); } /* * We use the full symtab and strtab which layout_symtab arranged to * be appended to the init section. Later we switch to the cut-down * core-only ones. */ void add_kallsyms(struct module *mod, const struct load_info *info) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; unsigned long strtab_size; void *data_base = mod->mem[MOD_DATA].base; void *init_data_base = mod->mem[MOD_INIT_DATA].base; /* Set up to point into init section. */ mod->kallsyms = (void __rcu *)init_data_base + info->mod_kallsyms_init_off; rcu_read_lock(); /* The following is safe since this pointer cannot change */ rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ rcu_dereference(mod->kallsyms)->strtab = (void *)info->sechdrs[info->index.str].sh_addr; rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init * and set types up while we still have access to sections. */ mod->core_kallsyms.symtab = dst = data_base + info->symoffs; mod->core_kallsyms.strtab = s = data_base + info->stroffs; mod->core_kallsyms.typetab = data_base + info->core_typeoffs; strtab_size = info->core_typeoffs - info->stroffs; src = rcu_dereference(mod->kallsyms)->symtab; for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { ssize_t ret; mod->core_kallsyms.typetab[ndst] = rcu_dereference(mod->kallsyms)->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; ret = strscpy(s, &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], strtab_size); if (ret < 0) break; s += ret + 1; strtab_size -= ret + 1; } } rcu_read_unlock(); mod->core_kallsyms.num_symtab = ndst; } #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) void init_build_id(struct module *mod, const struct load_info *info) { const Elf_Shdr *sechdr; unsigned int i; for (i = 0; i < info->hdr->e_shnum; i++) { sechdr = &info->sechdrs[i]; if (!sect_empty(sechdr) && sechdr->sh_type == SHT_NOTE && !build_id_parse_buf((void *)sechdr->sh_addr, mod->build_id, sechdr->sh_size)) break; } } #else void init_build_id(struct module *mod, const struct load_info *info) { } #endif static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, unsigned int symnum) { return kallsyms->strtab + kallsyms->symtab[symnum].st_name; } /* * Given a module and address, find the corresponding symbol and return its name * while providing its size and offset if needed. */ static const char *find_kallsyms_symbol(struct module *mod, unsigned long addr, unsigned long *size, unsigned long *offset) { unsigned int i, best = 0; unsigned long nextval, bestval; struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); struct module_memory *mod_mem; /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) mod_mem = &mod->mem[MOD_INIT_TEXT]; else mod_mem = &mod->mem[MOD_TEXT]; nextval = (unsigned long)mod_mem->base + mod_mem->size; bestval = kallsyms_symbol_value(&kallsyms->symtab[best]); /* * Scan for closest preceding symbol, and next symbol. (ELF * starts real symbols at 1). */ for (i = 1; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; unsigned long thisval = kallsyms_symbol_value(sym); if (sym->st_shndx == SHN_UNDEF) continue; /* * We ignore unnamed symbols: they're uninformative * and inserted at a whim. */ if (*kallsyms_symbol_name(kallsyms, i) == '\0' || is_mapping_symbol(kallsyms_symbol_name(kallsyms, i))) continue; if (thisval <= addr && thisval > bestval) { best = i; bestval = thisval; } if (thisval > addr && thisval < nextval) nextval = thisval; } if (!best) return NULL; if (size) *size = nextval - bestval; if (offset) *offset = addr - bestval; return kallsyms_symbol_name(kallsyms, best); } void * __weak dereference_module_function_descriptor(struct module *mod, void *ptr) { return ptr; } /* * For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ int module_address_lookup(unsigned long addr, unsigned long *size, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { const char *sym; int ret = 0; struct module *mod; preempt_disable(); mod = __module_address(addr); if (mod) { if (modname) *modname = mod->name; if (modbuildid) { #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) *modbuildid = mod->build_id; #else *modbuildid = NULL; #endif } sym = find_kallsyms_symbol(mod, addr, size, offset); if (sym) ret = strscpy(namebuf, sym, KSYM_NAME_LEN); } preempt_enable(); return ret; } int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; if (within_module(addr, mod)) { const char *sym; sym = find_kallsyms_symbol(mod, addr, NULL, NULL); if (!sym) goto out; strscpy(symname, sym, KSYM_NAME_LEN); preempt_enable(); return 0; } } out: preempt_enable(); return -ERANGE; } int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) { struct module *mod; preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; kallsyms = rcu_dereference_sched(mod->kallsyms); if (symnum < kallsyms->num_symtab) { const Elf_Sym *sym = &kallsyms->symtab[symnum]; *value = kallsyms_symbol_value(sym); *type = kallsyms->typetab[symnum]; strscpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strscpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } symnum -= kallsyms->num_symtab; } preempt_enable(); return -ERANGE; } /* Given a module and name of symbol, find and return the symbol's value */ static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; if (strcmp(name, kallsyms_symbol_name(kallsyms, i)) == 0 && sym->st_shndx != SHN_UNDEF) return kallsyms_symbol_value(sym); } return 0; } static unsigned long __module_kallsyms_lookup_name(const char *name) { struct module *mod; char *colon; colon = strnchr(name, MODULE_NAME_LEN, ':'); if (colon) { mod = find_module_all(name, colon - name, false); if (mod) return __find_kallsyms_symbol_value(mod, colon + 1); return 0; } list_for_each_entry_rcu(mod, &modules, list) { unsigned long ret; if (mod->state == MODULE_STATE_UNFORMED) continue; ret = __find_kallsyms_symbol_value(mod, name); if (ret) return ret; } return 0; } /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name) { unsigned long ret; /* Don't lock: we're in enough trouble already. */ preempt_disable(); ret = __module_kallsyms_lookup_name(name); preempt_enable(); return ret; } unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned long ret; preempt_disable(); ret = __find_kallsyms_symbol_value(mod, name); preempt_enable(); return ret; } int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, unsigned long), void *data) { struct module *mod; unsigned int i; int ret = 0; mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; if (modname && strcmp(modname, mod->name)) continue; /* Use rcu_dereference_sched() to remain compliant with the sparse tool */ preempt_disable(); kallsyms = rcu_dereference_sched(mod->kallsyms); preempt_enable(); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; if (sym->st_shndx == SHN_UNDEF) continue; ret = fn(data, kallsyms_symbol_name(kallsyms, i), kallsyms_symbol_value(sym)); if (ret != 0) goto out; } /* * The given module is found, the subsequent modules do not * need to be compared. */ if (modname) break; } out: mutex_unlock(&module_mutex); return ret; }
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 /* * * Author Karsten Keil <kkeil@novell.com> * * Copyright 2008 by Karsten Keil <kkeil@novell.com> * * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE * version 2.1 as published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU LESSER GENERAL PUBLIC LICENSE for more details. * */ #ifndef mISDNIF_H #define mISDNIF_H #include <linux/types.h> #include <linux/errno.h> #include <linux/socket.h> /* * ABI Version 32 bit * * <8 bit> Major version * - changed if any interface become backwards incompatible * * <8 bit> Minor version * - changed if any interface is extended but backwards compatible * * <16 bit> Release number * - should be incremented on every checkin */ #define MISDN_MAJOR_VERSION 1 #define MISDN_MINOR_VERSION 1 #define MISDN_RELEASE 29 /* primitives for information exchange * generell format * <16 bit 0 > * <8 bit command> * BIT 8 = 1 LAYER private * BIT 7 = 1 answer * BIT 6 = 1 DATA * <8 bit target layer mask> * * Layer = 00 is reserved for general commands Layer = 01 L2 -> HW Layer = 02 HW -> L2 Layer = 04 L3 -> L2 Layer = 08 L2 -> L3 * Layer = FF is reserved for broadcast commands */ #define MISDN_CMDMASK 0xff00 #define MISDN_LAYERMASK 0x00ff /* generell commands */ #define OPEN_CHANNEL 0x0100 #define CLOSE_CHANNEL 0x0200 #define CONTROL_CHANNEL 0x0300 #define CHECK_DATA 0x0400 /* layer 2 -> layer 1 */ #define PH_ACTIVATE_REQ 0x0101 #define PH_DEACTIVATE_REQ 0x0201 #define PH_DATA_REQ 0x2001 #define MPH_ACTIVATE_REQ 0x0501 #define MPH_DEACTIVATE_REQ 0x0601 #define MPH_INFORMATION_REQ 0x0701 #define PH_CONTROL_REQ 0x0801 /* layer 1 -> layer 2 */ #define PH_ACTIVATE_IND 0x0102 #define PH_ACTIVATE_CNF 0x4102 #define PH_DEACTIVATE_IND 0x0202 #define PH_DEACTIVATE_CNF 0x4202 #define PH_DATA_IND 0x2002 #define PH_DATA_E_IND 0x3002 #define MPH_ACTIVATE_IND 0x0502 #define MPH_DEACTIVATE_IND 0x0602 #define MPH_INFORMATION_IND 0x0702 #define PH_DATA_CNF 0x6002 #define PH_CONTROL_IND 0x0802 #define PH_CONTROL_CNF 0x4802 /* layer 3 -> layer 2 */ #define DL_ESTABLISH_REQ 0x1004 #define DL_RELEASE_REQ 0x1104 #define DL_DATA_REQ 0x3004 #define DL_UNITDATA_REQ 0x3104 #define DL_INFORMATION_REQ 0x0004 /* layer 2 -> layer 3 */ #define DL_ESTABLISH_IND 0x1008 #define DL_ESTABLISH_CNF 0x5008 #define DL_RELEASE_IND 0x1108 #define DL_RELEASE_CNF 0x5108 #define DL_DATA_IND 0x3008 #define DL_UNITDATA_IND 0x3108 #define DL_INFORMATION_IND 0x0008 /* intern layer 2 management */ #define MDL_ASSIGN_REQ 0x1804 #define MDL_ASSIGN_IND 0x1904 #define MDL_REMOVE_REQ 0x1A04 #define MDL_REMOVE_IND 0x1B04 #define MDL_STATUS_UP_IND 0x1C04 #define MDL_STATUS_DOWN_IND 0x1D04 #define MDL_STATUS_UI_IND 0x1E04 #define MDL_ERROR_IND 0x1F04 #define MDL_ERROR_RSP 0x5F04 /* intern layer 2 */ #define DL_TIMER200_IND 0x7004 #define DL_TIMER203_IND 0x7304 #define DL_INTERN_MSG 0x7804 /* DL_INFORMATION_IND types */ #define DL_INFO_L2_CONNECT 0x0001 #define DL_INFO_L2_REMOVED 0x0002 /* PH_CONTROL types */ /* TOUCH TONE IS 0x20XX XX "0"..."9", "A","B","C","D","*","#" */ #define DTMF_TONE_VAL 0x2000 #define DTMF_TONE_MASK 0x007F #define DTMF_TONE_START 0x2100 #define DTMF_TONE_STOP 0x2200 #define DTMF_HFC_COEF 0x4000 #define DSP_CONF_JOIN 0x2403 #define DSP_CONF_SPLIT 0x2404 #define DSP_RECEIVE_OFF 0x2405 #define DSP_RECEIVE_ON 0x2406 #define DSP_ECHO_ON 0x2407 #define DSP_ECHO_OFF 0x2408 #define DSP_MIX_ON 0x2409 #define DSP_MIX_OFF 0x240a #define DSP_DELAY 0x240b #define DSP_JITTER 0x240c #define DSP_TXDATA_ON 0x240d #define DSP_TXDATA_OFF 0x240e #define DSP_TX_DEJITTER 0x240f #define DSP_TX_DEJ_OFF 0x2410 #define DSP_TONE_PATT_ON 0x2411 #define DSP_TONE_PATT_OFF 0x2412 #define DSP_VOL_CHANGE_TX 0x2413 #define DSP_VOL_CHANGE_RX 0x2414 #define DSP_BF_ENABLE_KEY 0x2415 #define DSP_BF_DISABLE 0x2416 #define DSP_BF_ACCEPT 0x2416 #define DSP_BF_REJECT 0x2417 #define DSP_PIPELINE_CFG 0x2418 #define HFC_VOL_CHANGE_TX 0x2601 #define HFC_VOL_CHANGE_RX 0x2602 #define HFC_SPL_LOOP_ON 0x2603 #define HFC_SPL_LOOP_OFF 0x2604 /* for T30 FAX and analog modem */ #define HW_MOD_FRM 0x4000 #define HW_MOD_FRH 0x4001 #define HW_MOD_FTM 0x4002 #define HW_MOD_FTH 0x4003 #define HW_MOD_FTS 0x4004 #define HW_MOD_CONNECT 0x4010 #define HW_MOD_OK 0x4011 #define HW_MOD_NOCARR 0x4012 #define HW_MOD_FCERROR 0x4013 #define HW_MOD_READY 0x4014 #define HW_MOD_LASTDATA 0x4015 /* DSP_TONE_PATT_ON parameter */ #define TONE_OFF 0x0000 #define TONE_GERMAN_DIALTONE 0x0001 #define TONE_GERMAN_OLDDIALTONE 0x0002 #define TONE_AMERICAN_DIALTONE 0x0003 #define TONE_GERMAN_DIALPBX 0x0004 #define TONE_GERMAN_OLDDIALPBX 0x0005 #define TONE_AMERICAN_DIALPBX 0x0006 #define TONE_GERMAN_RINGING 0x0007 #define TONE_GERMAN_OLDRINGING 0x0008 #define TONE_AMERICAN_RINGPBX 0x000b #define TONE_GERMAN_RINGPBX 0x000c #define TONE_GERMAN_OLDRINGPBX 0x000d #define TONE_AMERICAN_RINGING 0x000e #define TONE_GERMAN_BUSY 0x000f #define TONE_GERMAN_OLDBUSY 0x0010 #define TONE_AMERICAN_BUSY 0x0011 #define TONE_GERMAN_HANGUP 0x0012 #define TONE_GERMAN_OLDHANGUP 0x0013 #define TONE_AMERICAN_HANGUP 0x0014 #define TONE_SPECIAL_INFO 0x0015 #define TONE_GERMAN_GASSENBESETZT 0x0016 #define TONE_GERMAN_AUFSCHALTTON 0x0016 /* MPH_INFORMATION_IND */ #define L1_SIGNAL_LOS_OFF 0x0010 #define L1_SIGNAL_LOS_ON 0x0011 #define L1_SIGNAL_AIS_OFF 0x0012 #define L1_SIGNAL_AIS_ON 0x0013 #define L1_SIGNAL_RDI_OFF 0x0014 #define L1_SIGNAL_RDI_ON 0x0015 #define L1_SIGNAL_SLIP_RX 0x0020 #define L1_SIGNAL_SLIP_TX 0x0021 /* * protocol ids * D channel 1-31 * B channel 33 - 63 */ #define ISDN_P_NONE 0 #define ISDN_P_BASE 0 #define ISDN_P_TE_S0 0x01 #define ISDN_P_NT_S0 0x02 #define ISDN_P_TE_E1 0x03 #define ISDN_P_NT_E1 0x04 #define ISDN_P_TE_UP0 0x05 #define ISDN_P_NT_UP0 0x06 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \ (p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE)) #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \ (p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT)) #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0)) #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1)) #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0)) #define ISDN_P_LAPD_TE 0x10 #define ISDN_P_LAPD_NT 0x11 #define ISDN_P_B_MASK 0x1f #define ISDN_P_B_START 0x20 #define ISDN_P_B_RAW 0x21 #define ISDN_P_B_HDLC 0x22 #define ISDN_P_B_X75SLP 0x23 #define ISDN_P_B_L2DTMF 0x24 #define ISDN_P_B_L2DSP 0x25 #define ISDN_P_B_L2DSPHDLC 0x26 #define ISDN_P_B_T30_FAX 0x27 #define ISDN_P_B_MODEM_ASYNC 0x28 #define OPTION_L2_PMX 1 #define OPTION_L2_PTP 2 #define OPTION_L2_FIXEDTEI 3 #define OPTION_L2_CLEANUP 4 #define OPTION_L1_HOLD 5 /* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */ #define MISDN_MAX_IDLEN 20 struct mISDNhead { unsigned int prim; unsigned int id; } __packed; #define MISDN_HEADER_LEN sizeof(struct mISDNhead) #define MAX_DATA_SIZE 2048 #define MAX_DATA_MEM (MAX_DATA_SIZE + MISDN_HEADER_LEN) #define MAX_DFRAME_LEN 260 #define MISDN_ID_ADDR_MASK 0xFFFF #define MISDN_ID_TEI_MASK 0xFF00 #define MISDN_ID_SAPI_MASK 0x00FF #define MISDN_ID_TEI_ANY 0x7F00 #define MISDN_ID_ANY 0xFFFF #define MISDN_ID_NONE 0xFFFE #define GROUP_TEI 127 #define TEI_SAPI 63 #define CTRL_SAPI 0 #define MISDN_MAX_CHANNEL 127 #define MISDN_CHMAP_SIZE ((MISDN_MAX_CHANNEL + 1) >> 3) #define SOL_MISDN 0 struct sockaddr_mISDN { sa_family_t family; unsigned char dev; unsigned char channel; unsigned char sapi; unsigned char tei; }; struct mISDNversion { unsigned char major; unsigned char minor; unsigned short release; }; struct mISDN_devinfo { u_int id; u_int Dprotocols; u_int Bprotocols; u_int protocol; u_char channelmap[MISDN_CHMAP_SIZE]; u_int nrbchan; char name[MISDN_MAX_IDLEN]; }; struct mISDN_devrename { u_int id; char name[MISDN_MAX_IDLEN]; /* new name */ }; /* MPH_INFORMATION_REQ payload */ struct ph_info_ch { __u32 protocol; __u64 Flags; }; struct ph_info_dch { struct ph_info_ch ch; __u16 state; __u16 num_bch; }; struct ph_info { struct ph_info_dch dch; struct ph_info_ch bch[]; }; /* timer device ioctl */ #define IMADDTIMER _IOR('I', 64, int) #define IMDELTIMER _IOR('I', 65, int) /* socket ioctls */ #define IMGETVERSION _IOR('I', 66, int) #define IMGETCOUNT _IOR('I', 67, int) #define IMGETDEVINFO _IOR('I', 68, int) #define IMCTRLREQ _IOR('I', 69, int) #define IMCLEAR_L2 _IOR('I', 70, int) #define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) #define IMHOLD_L1 _IOR('I', 72, int) static inline int test_channelmap(u_int nr, u_char *map) { if (nr <= MISDN_MAX_CHANNEL) return map[nr >> 3] & (1 << (nr & 7)); else return 0; } static inline void set_channelmap(u_int nr, u_char *map) { map[nr >> 3] |= (1 << (nr & 7)); } static inline void clear_channelmap(u_int nr, u_char *map) { map[nr >> 3] &= ~(1 << (nr & 7)); } /* CONTROL_CHANNEL parameters */ #define MISDN_CTRL_GETOP 0x0000 #define MISDN_CTRL_LOOP 0x0001 #define MISDN_CTRL_CONNECT 0x0002 #define MISDN_CTRL_DISCONNECT 0x0004 #define MISDN_CTRL_RX_BUFFER 0x0008 #define MISDN_CTRL_PCMCONNECT 0x0010 #define MISDN_CTRL_PCMDISCONNECT 0x0020 #define MISDN_CTRL_SETPEER 0x0040 #define MISDN_CTRL_UNSETPEER 0x0080 #define MISDN_CTRL_RX_OFF 0x0100 #define MISDN_CTRL_FILL_EMPTY 0x0200 #define MISDN_CTRL_GETPEER 0x0400 #define MISDN_CTRL_L1_TIMER3 0x0800 #define MISDN_CTRL_HW_FEATURES_OP 0x2000 #define MISDN_CTRL_HW_FEATURES 0x2001 #define MISDN_CTRL_HFC_OP 0x4000 #define MISDN_CTRL_HFC_PCM_CONN 0x4001 #define MISDN_CTRL_HFC_PCM_DISC 0x4002 #define MISDN_CTRL_HFC_CONF_JOIN 0x4003 #define MISDN_CTRL_HFC_CONF_SPLIT 0x4004 #define MISDN_CTRL_HFC_RECEIVE_OFF 0x4005 #define MISDN_CTRL_HFC_RECEIVE_ON 0x4006 #define MISDN_CTRL_HFC_ECHOCAN_ON 0x4007 #define MISDN_CTRL_HFC_ECHOCAN_OFF 0x4008 #define MISDN_CTRL_HFC_WD_INIT 0x4009 #define MISDN_CTRL_HFC_WD_RESET 0x400A /* special RX buffer value for MISDN_CTRL_RX_BUFFER request.p1 is the minimum * buffer size request.p2 the maximum. Using MISDN_CTRL_RX_SIZE_IGNORE will * not change the value, but still read back the actual stetting. */ #define MISDN_CTRL_RX_SIZE_IGNORE -1 /* socket options */ #define MISDN_TIME_STAMP 0x0001 struct mISDN_ctrl_req { int op; int channel; int p1; int p2; }; /* muxer options */ #define MISDN_OPT_ALL 1 #define MISDN_OPT_TEIMGR 2 #ifdef __KERNEL__ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/net.h> #include <net/sock.h> #include <linux/completion.h> #define DEBUG_CORE 0x000000ff #define DEBUG_CORE_FUNC 0x00000002 #define DEBUG_SOCKET 0x00000004 #define DEBUG_MANAGER 0x00000008 #define DEBUG_SEND_ERR 0x00000010 #define DEBUG_MSG_THREAD 0x00000020 #define DEBUG_QUEUE_FUNC 0x00000040 #define DEBUG_L1 0x0000ff00 #define DEBUG_L1_FSM 0x00000200 #define DEBUG_L2 0x00ff0000 #define DEBUG_L2_FSM 0x00020000 #define DEBUG_L2_CTRL 0x00040000 #define DEBUG_L2_RECV 0x00080000 #define DEBUG_L2_TEI 0x00100000 #define DEBUG_L2_TEIFSM 0x00200000 #define DEBUG_TIMER 0x01000000 #define DEBUG_CLOCK 0x02000000 #define mISDN_HEAD_P(s) ((struct mISDNhead *)&s->cb[0]) #define mISDN_HEAD_PRIM(s) (((struct mISDNhead *)&s->cb[0])->prim) #define mISDN_HEAD_ID(s) (((struct mISDNhead *)&s->cb[0])->id) /* socket states */ #define MISDN_OPEN 1 #define MISDN_BOUND 2 #define MISDN_CLOSED 3 struct mISDNchannel; struct mISDNdevice; struct mISDNstack; struct mISDNclock; struct channel_req { u_int protocol; struct sockaddr_mISDN adr; struct mISDNchannel *ch; }; typedef int (ctrl_func_t)(struct mISDNchannel *, u_int, void *); typedef int (send_func_t)(struct mISDNchannel *, struct sk_buff *); typedef int (create_func_t)(struct channel_req *); struct Bprotocol { struct list_head list; char *name; u_int Bprotocols; create_func_t *create; }; struct mISDNchannel { struct list_head list; u_int protocol; u_int nr; u_long opt; u_int addr; struct mISDNstack *st; struct mISDNchannel *peer; send_func_t *send; send_func_t *recv; ctrl_func_t *ctrl; }; struct mISDN_sock_list { struct hlist_head head; rwlock_t lock; }; struct mISDN_sock { struct sock sk; struct mISDNchannel ch; u_int cmask; struct mISDNdevice *dev; }; struct mISDNdevice { struct mISDNchannel D; u_int id; u_int Dprotocols; u_int Bprotocols; u_int nrbchan; u_char channelmap[MISDN_CHMAP_SIZE]; struct list_head bchannels; struct mISDNchannel *teimgr; struct device dev; }; struct mISDNstack { u_long status; struct mISDNdevice *dev; struct task_struct *thread; struct completion *notify; wait_queue_head_t workq; struct sk_buff_head msgq; struct list_head layer2; struct mISDNchannel *layer1; struct mISDNchannel own; struct mutex lmutex; /* protect lists */ struct mISDN_sock_list l1sock; #ifdef MISDN_MSG_STATS u_int msg_cnt; u_int sleep_cnt; u_int stopped_cnt; #endif }; typedef int (clockctl_func_t)(void *, int); struct mISDNclock { struct list_head list; char name[64]; int pri; clockctl_func_t *ctl; void *priv; }; /* global alloc/queue functions */ static inline struct sk_buff * mI_alloc_skb(unsigned int len, gfp_t gfp_mask) { struct sk_buff *skb; skb = alloc_skb(len + MISDN_HEADER_LEN, gfp_mask); if (likely(skb)) skb_reserve(skb, MISDN_HEADER_LEN); return skb; } static inline struct sk_buff * _alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask) { struct sk_buff *skb = mI_alloc_skb(len, gfp_mask); struct mISDNhead *hh; if (!skb) return NULL; if (len) skb_put_data(skb, dp, len); hh = mISDN_HEAD_P(skb); hh->prim = prim; hh->id = id; return skb; } static inline void _queue_data(struct mISDNchannel *ch, u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask) { struct sk_buff *skb; if (!ch->peer) return; skb = _alloc_mISDN_skb(prim, id, len, dp, gfp_mask); if (!skb) return; if (ch->recv(ch->peer, skb)) dev_kfree_skb(skb); } /* global register/unregister functions */ extern int mISDN_register_device(struct mISDNdevice *, struct device *parent, char *name); extern void mISDN_unregister_device(struct mISDNdevice *); extern int mISDN_register_Bprotocol(struct Bprotocol *); extern void mISDN_unregister_Bprotocol(struct Bprotocol *); extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *, void *); extern void mISDN_unregister_clock(struct mISDNclock *); static inline struct mISDNdevice *dev_to_mISDN(const struct device *dev) { if (dev) return dev_get_drvdata(dev); else return NULL; } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); extern void mISDN_clock_update(struct mISDNclock *, int, ktime_t *); extern unsigned short mISDN_clock_get(void); extern const char *mISDNDevName4ch(struct mISDNchannel *); #endif /* __KERNEL__ */ #endif /* mISDNIF_H */
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 // SPDX-License-Identifier: GPL-2.0-only /* * Driver for the ov9650 sensor * * Copyright (C) 2008 Erik Andrén * Copyright (C) 2007 Ilyes Gouta. Based on the m5603x Linux Driver Project. * Copyright (C) 2005 m5603x Linux Driver Project <m5602@x3ng.com.br> * * Portions of code to USB interface and ALi driver software, * Copyright (c) 2006 Willem Duinker * v4l2 interface modeled after the V4L2 driver * for SN9C10x PC Camera Controllers */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "m5602_ov9650.h" static int ov9650_s_ctrl(struct v4l2_ctrl *ctrl); static void ov9650_dump_registers(struct sd *sd); static const unsigned char preinit_ov9650[][3] = { /* [INITCAM] */ {BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02}, {BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0}, {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0}, {BRIDGE, M5602_XB_ADC_CTRL, 0xc0}, {BRIDGE, M5602_XB_SENSOR_CTRL, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x08}, {BRIDGE, M5602_XB_GPIO_DIR, 0x05}, {BRIDGE, M5602_XB_GPIO_DAT, 0x04}, {BRIDGE, M5602_XB_GPIO_EN_H, 0x06}, {BRIDGE, M5602_XB_GPIO_DIR_H, 0x06}, {BRIDGE, M5602_XB_GPIO_DAT_H, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x00}, {BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a}, /* Reset chip */ {SENSOR, OV9650_COM7, OV9650_REGISTER_RESET}, /* Enable double clock */ {SENSOR, OV9650_CLKRC, 0x80}, /* Do something out of spec with the power */ {SENSOR, OV9650_OFON, 0x40} }; static const unsigned char init_ov9650[][3] = { /* [INITCAM] */ {BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02}, {BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0}, {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0}, {BRIDGE, M5602_XB_ADC_CTRL, 0xc0}, {BRIDGE, M5602_XB_SENSOR_CTRL, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x08}, {BRIDGE, M5602_XB_GPIO_DIR, 0x05}, {BRIDGE, M5602_XB_GPIO_DAT, 0x04}, {BRIDGE, M5602_XB_GPIO_EN_H, 0x06}, {BRIDGE, M5602_XB_GPIO_DIR_H, 0x06}, {BRIDGE, M5602_XB_GPIO_DAT_H, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x00}, {BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a}, /* Reset chip */ {SENSOR, OV9650_COM7, OV9650_REGISTER_RESET}, /* One extra reset is needed in order to make the sensor behave properly when resuming from ram, could be a timing issue */ {SENSOR, OV9650_COM7, OV9650_REGISTER_RESET}, /* Enable double clock */ {SENSOR, OV9650_CLKRC, 0x80}, /* Do something out of spec with the power */ {SENSOR, OV9650_OFON, 0x40}, /* Set fast AGC/AEC algorithm with unlimited step size */ {SENSOR, OV9650_COM8, OV9650_FAST_AGC_AEC | OV9650_AEC_UNLIM_STEP_SIZE}, {SENSOR, OV9650_CHLF, 0x10}, {SENSOR, OV9650_ARBLM, 0xbf}, {SENSOR, OV9650_ACOM38, 0x81}, /* Turn off color matrix coefficient double option */ {SENSOR, OV9650_COM16, 0x00}, /* Enable color matrix for RGB/YUV, Delay Y channel, set output Y/UV delay to 1 */ {SENSOR, OV9650_COM13, 0x19}, /* Enable digital BLC, Set output mode to U Y V Y */ {SENSOR, OV9650_TSLB, 0x0c}, /* Limit the AGC/AEC stable upper region */ {SENSOR, OV9650_COM24, 0x00}, /* Enable HREF and some out of spec things */ {SENSOR, OV9650_COM12, 0x73}, /* Set all DBLC offset signs to positive and do some out of spec stuff */ {SENSOR, OV9650_DBLC1, 0xdf}, {SENSOR, OV9650_COM21, 0x06}, {SENSOR, OV9650_RSVD35, 0x91}, /* Necessary, no camera stream without it */ {SENSOR, OV9650_RSVD16, 0x06}, {SENSOR, OV9650_RSVD94, 0x99}, {SENSOR, OV9650_RSVD95, 0x99}, {SENSOR, OV9650_RSVD96, 0x04}, /* Enable full range output */ {SENSOR, OV9650_COM15, 0x0}, /* Enable HREF at optical black, enable ADBLC bias, enable ADBLC, reset timings at format change */ {SENSOR, OV9650_COM6, 0x4b}, /* Subtract 32 from the B channel bias */ {SENSOR, OV9650_BBIAS, 0xa0}, /* Subtract 32 from the Gb channel bias */ {SENSOR, OV9650_GbBIAS, 0xa0}, /* Do not bypass the analog BLC and to some out of spec stuff */ {SENSOR, OV9650_Gr_COM, 0x00}, /* Subtract 32 from the R channel bias */ {SENSOR, OV9650_RBIAS, 0xa0}, /* Subtract 32 from the R channel bias */ {SENSOR, OV9650_RBIAS, 0x0}, {SENSOR, OV9650_COM26, 0x80}, {SENSOR, OV9650_ACOMA9, 0x98}, /* Set the AGC/AEC stable region upper limit */ {SENSOR, OV9650_AEW, 0x68}, /* Set the AGC/AEC stable region lower limit */ {SENSOR, OV9650_AEB, 0x5c}, /* Set the high and low limit nibbles to 3 */ {SENSOR, OV9650_VPT, 0xc3}, /* Set the Automatic Gain Ceiling (AGC) to 128x, drop VSYNC at frame drop, limit exposure timing, drop frame when the AEC step is larger than the exposure gap */ {SENSOR, OV9650_COM9, 0x6e}, /* Set VSYNC negative, Set RESET to SLHS (slave mode horizontal sync) and set PWDN to SLVS (slave mode vertical sync) */ {SENSOR, OV9650_COM10, 0x42}, /* Set horizontal column start high to default value */ {SENSOR, OV9650_HSTART, 0x1a}, /* 210 */ /* Set horizontal column end */ {SENSOR, OV9650_HSTOP, 0xbf}, /* 1534 */ /* Complementing register to the two writes above */ {SENSOR, OV9650_HREF, 0xb2}, /* Set vertical row start high bits */ {SENSOR, OV9650_VSTRT, 0x02}, /* Set vertical row end low bits */ {SENSOR, OV9650_VSTOP, 0x7e}, /* Set complementing vertical frame control */ {SENSOR, OV9650_VREF, 0x10}, {SENSOR, OV9650_ADC, 0x04}, {SENSOR, OV9650_HV, 0x40}, /* Enable denoise, and white-pixel erase */ {SENSOR, OV9650_COM22, OV9650_DENOISE_ENABLE | OV9650_WHITE_PIXEL_ENABLE | OV9650_WHITE_PIXEL_OPTION}, /* Enable VARIOPIXEL */ {SENSOR, OV9650_COM3, OV9650_VARIOPIXEL}, {SENSOR, OV9650_COM4, OV9650_QVGA_VARIOPIXEL}, /* Put the sensor in soft sleep mode */ {SENSOR, OV9650_COM2, OV9650_SOFT_SLEEP | OV9650_OUTPUT_DRIVE_2X}, }; static const unsigned char res_init_ov9650[][3] = { {SENSOR, OV9650_COM2, OV9650_OUTPUT_DRIVE_2X}, {BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x82}, {BRIDGE, M5602_XB_LINE_OF_FRAME_L, 0x00}, {BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82}, {BRIDGE, M5602_XB_PIX_OF_LINE_L, 0x00}, {BRIDGE, M5602_XB_SIG_INI, 0x01} }; /* Vertically and horizontally flips the image if matched, needed for machines where the sensor is mounted upside down */ static const struct dmi_system_id ov9650_flip_dmi_table[] = { { .ident = "ASUS A6Ja", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6J") } }, { .ident = "ASUS A6JC", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6JC") } }, { .ident = "ASUS A6K", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6K") } }, { .ident = "ASUS A6Kt", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6Kt") } }, { .ident = "ASUS A6VA", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6VA") } }, { .ident = "ASUS A6VC", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6VC") } }, { .ident = "ASUS A6VM", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A6VM") } }, { .ident = "ASUS A7V", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "A7V") } }, { .ident = "Alienware Aurora m9700", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Aurora m9700") } }, {} }; static struct v4l2_pix_format ov9650_modes[] = { { 176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .sizeimage = 176 * 144, .bytesperline = 176, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 9 }, { 320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .sizeimage = 320 * 240, .bytesperline = 320, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 8 }, { 352, 288, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .sizeimage = 352 * 288, .bytesperline = 352, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 9 }, { 640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .sizeimage = 640 * 480, .bytesperline = 640, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 9 } }; static const struct v4l2_ctrl_ops ov9650_ctrl_ops = { .s_ctrl = ov9650_s_ctrl, }; int ov9650_probe(struct sd *sd) { int err = 0; u8 prod_id = 0, ver_id = 0, i; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if (force_sensor) { if (force_sensor == OV9650_SENSOR) { pr_info("Forcing an %s sensor\n", ov9650.name); goto sensor_found; } /* If we want to force another sensor, don't try to probe this one */ return -ENODEV; } gspca_dbg(gspca_dev, D_PROBE, "Probing for an ov9650 sensor\n"); /* Run the pre-init before probing the sensor */ for (i = 0; i < ARRAY_SIZE(preinit_ov9650) && !err; i++) { u8 data = preinit_ov9650[i][2]; if (preinit_ov9650[i][0] == SENSOR) err = m5602_write_sensor(sd, preinit_ov9650[i][1], &data, 1); else err = m5602_write_bridge(sd, preinit_ov9650[i][1], data); } if (err < 0) return err; if (m5602_read_sensor(sd, OV9650_PID, &prod_id, 1)) return -ENODEV; if (m5602_read_sensor(sd, OV9650_VER, &ver_id, 1)) return -ENODEV; if ((prod_id == 0x96) && (ver_id == 0x52)) { pr_info("Detected an ov9650 sensor\n"); goto sensor_found; } return -ENODEV; sensor_found: sd->gspca_dev.cam.cam_mode = ov9650_modes; sd->gspca_dev.cam.nmodes = ARRAY_SIZE(ov9650_modes); return 0; } int ov9650_init(struct sd *sd) { int i, err = 0; u8 data; if (dump_sensor) ov9650_dump_registers(sd); for (i = 0; i < ARRAY_SIZE(init_ov9650) && !err; i++) { data = init_ov9650[i][2]; if (init_ov9650[i][0] == SENSOR) err = m5602_write_sensor(sd, init_ov9650[i][1], &data, 1); else err = m5602_write_bridge(sd, init_ov9650[i][1], data); } return 0; } int ov9650_init_controls(struct sd *sd) { struct v4l2_ctrl_handler *hdl = &sd->gspca_dev.ctrl_handler; sd->gspca_dev.vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 9); sd->auto_white_bal = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_AUTO_WHITE_BALANCE, 0, 1, 1, 1); sd->red_bal = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_RED_BALANCE, 0, 255, 1, RED_GAIN_DEFAULT); sd->blue_bal = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_BLUE_BALANCE, 0, 255, 1, BLUE_GAIN_DEFAULT); sd->autoexpo = v4l2_ctrl_new_std_menu(hdl, &ov9650_ctrl_ops, V4L2_CID_EXPOSURE_AUTO, 1, 0, V4L2_EXPOSURE_AUTO); sd->expo = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_EXPOSURE, 0, 0x1ff, 4, EXPOSURE_DEFAULT); sd->autogain = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); sd->gain = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_GAIN, 0, 0x3ff, 1, GAIN_DEFAULT); sd->hflip = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); sd->vflip = v4l2_ctrl_new_std(hdl, &ov9650_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_auto_cluster(3, &sd->auto_white_bal, 0, false); v4l2_ctrl_auto_cluster(2, &sd->autoexpo, 0, false); v4l2_ctrl_auto_cluster(2, &sd->autogain, 0, false); v4l2_ctrl_cluster(2, &sd->hflip); return 0; } int ov9650_start(struct sd *sd) { u8 data; int i, err = 0; struct cam *cam = &sd->gspca_dev.cam; int width = cam->cam_mode[sd->gspca_dev.curr_mode].width; int height = cam->cam_mode[sd->gspca_dev.curr_mode].height; int ver_offs = cam->cam_mode[sd->gspca_dev.curr_mode].priv; int hor_offs = OV9650_LEFT_OFFSET; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if ((!dmi_check_system(ov9650_flip_dmi_table) && sd->vflip->val) || (dmi_check_system(ov9650_flip_dmi_table) && !sd->vflip->val)) ver_offs--; if (width <= 320) hor_offs /= 2; /* Synthesize the vsync/hsync setup */ for (i = 0; i < ARRAY_SIZE(res_init_ov9650) && !err; i++) { if (res_init_ov9650[i][0] == BRIDGE) err = m5602_write_bridge(sd, res_init_ov9650[i][1], res_init_ov9650[i][2]); else if (res_init_ov9650[i][0] == SENSOR) { data = res_init_ov9650[i][2]; err = m5602_write_sensor(sd, res_init_ov9650[i][1], &data, 1); } } if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, ((ver_offs >> 8) & 0xff)); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, (ver_offs & 0xff)); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, (height >> 8) & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, (height & 0xff)); if (err < 0) return err; for (i = 0; i < 2 && !err; i++) err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 2); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, (hor_offs >> 8) & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, hor_offs & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, ((width + hor_offs) >> 8) & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, ((width + hor_offs) & 0xff)); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 0); if (err < 0) return err; switch (width) { case 640: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for VGA mode\n"); data = OV9650_VGA_SELECT | OV9650_RGB_SELECT | OV9650_RAW_RGB_SELECT; err = m5602_write_sensor(sd, OV9650_COM7, &data, 1); break; case 352: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for CIF mode\n"); data = OV9650_CIF_SELECT | OV9650_RGB_SELECT | OV9650_RAW_RGB_SELECT; err = m5602_write_sensor(sd, OV9650_COM7, &data, 1); break; case 320: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for QVGA mode\n"); data = OV9650_QVGA_SELECT | OV9650_RGB_SELECT | OV9650_RAW_RGB_SELECT; err = m5602_write_sensor(sd, OV9650_COM7, &data, 1); break; case 176: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for QCIF mode\n"); data = OV9650_QCIF_SELECT | OV9650_RGB_SELECT | OV9650_RAW_RGB_SELECT; err = m5602_write_sensor(sd, OV9650_COM7, &data, 1); break; } return err; } int ov9650_stop(struct sd *sd) { u8 data = OV9650_SOFT_SLEEP | OV9650_OUTPUT_DRIVE_2X; return m5602_write_sensor(sd, OV9650_COM2, &data, 1); } void ov9650_disconnect(struct sd *sd) { ov9650_stop(sd); sd->sensor = NULL; } static int ov9650_set_exposure(struct gspca_dev *gspca_dev, __s32 val) { struct sd *sd = (struct sd *) gspca_dev; u8 i2c_data; int err; gspca_dbg(gspca_dev, D_CONF, "Set exposure to %d\n", val); /* The 6 MSBs */ i2c_data = (val >> 10) & 0x3f; err = m5602_write_sensor(sd, OV9650_AECHM, &i2c_data, 1); if (err < 0) return err; /* The 8 middle bits */ i2c_data = (val >> 2) & 0xff; err = m5602_write_sensor(sd, OV9650_AECH, &i2c_data, 1); if (err < 0) return err; /* The 2 LSBs */ i2c_data = val & 0x03; err = m5602_write_sensor(sd, OV9650_COM1, &i2c_data, 1); return err; } static int ov9650_set_gain(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Setting gain to %d\n", val); /* The 2 MSB */ /* Read the OV9650_VREF register first to avoid corrupting the VREF high and low bits */ err = m5602_read_sensor(sd, OV9650_VREF, &i2c_data, 1); if (err < 0) return err; /* Mask away all uninteresting bits */ i2c_data = ((val & 0x0300) >> 2) | (i2c_data & 0x3f); err = m5602_write_sensor(sd, OV9650_VREF, &i2c_data, 1); if (err < 0) return err; /* The 8 LSBs */ i2c_data = val & 0xff; err = m5602_write_sensor(sd, OV9650_GAIN, &i2c_data, 1); return err; } static int ov9650_set_red_balance(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Set red gain to %d\n", val); i2c_data = val & 0xff; err = m5602_write_sensor(sd, OV9650_RED, &i2c_data, 1); return err; } static int ov9650_set_blue_balance(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Set blue gain to %d\n", val); i2c_data = val & 0xff; err = m5602_write_sensor(sd, OV9650_BLUE, &i2c_data, 1); return err; } static int ov9650_set_hvflip(struct gspca_dev *gspca_dev) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; int hflip = sd->hflip->val; int vflip = sd->vflip->val; gspca_dbg(gspca_dev, D_CONF, "Set hvflip to %d %d\n", hflip, vflip); if (dmi_check_system(ov9650_flip_dmi_table)) vflip = !vflip; i2c_data = (hflip << 5) | (vflip << 4); err = m5602_write_sensor(sd, OV9650_MVFP, &i2c_data, 1); if (err < 0) return err; /* When vflip is toggled we need to readjust the bridge hsync/vsync */ if (gspca_dev->streaming) err = ov9650_start(sd); return err; } static int ov9650_set_auto_exposure(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Set auto exposure control to %d\n", val); err = m5602_read_sensor(sd, OV9650_COM8, &i2c_data, 1); if (err < 0) return err; val = (val == V4L2_EXPOSURE_AUTO); i2c_data = ((i2c_data & 0xfe) | ((val & 0x01) << 0)); return m5602_write_sensor(sd, OV9650_COM8, &i2c_data, 1); } static int ov9650_set_auto_white_balance(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Set auto white balance to %d\n", val); err = m5602_read_sensor(sd, OV9650_COM8, &i2c_data, 1); if (err < 0) return err; i2c_data = ((i2c_data & 0xfd) | ((val & 0x01) << 1)); err = m5602_write_sensor(sd, OV9650_COM8, &i2c_data, 1); return err; } static int ov9650_set_auto_gain(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 i2c_data; struct sd *sd = (struct sd *) gspca_dev; gspca_dbg(gspca_dev, D_CONF, "Set auto gain control to %d\n", val); err = m5602_read_sensor(sd, OV9650_COM8, &i2c_data, 1); if (err < 0) return err; i2c_data = ((i2c_data & 0xfb) | ((val & 0x01) << 2)); return m5602_write_sensor(sd, OV9650_COM8, &i2c_data, 1); } static int ov9650_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *) gspca_dev; int err; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_AUTO_WHITE_BALANCE: err = ov9650_set_auto_white_balance(gspca_dev, ctrl->val); if (err || ctrl->val) return err; err = ov9650_set_red_balance(gspca_dev, sd->red_bal->val); if (err) return err; err = ov9650_set_blue_balance(gspca_dev, sd->blue_bal->val); break; case V4L2_CID_EXPOSURE_AUTO: err = ov9650_set_auto_exposure(gspca_dev, ctrl->val); if (err || ctrl->val == V4L2_EXPOSURE_AUTO) return err; err = ov9650_set_exposure(gspca_dev, sd->expo->val); break; case V4L2_CID_AUTOGAIN: err = ov9650_set_auto_gain(gspca_dev, ctrl->val); if (err || ctrl->val) return err; err = ov9650_set_gain(gspca_dev, sd->gain->val); break; case V4L2_CID_HFLIP: err = ov9650_set_hvflip(gspca_dev); break; default: return -EINVAL; } return err; } static void ov9650_dump_registers(struct sd *sd) { int address; pr_info("Dumping the ov9650 register state\n"); for (address = 0; address < 0xa9; address++) { u8 value; m5602_read_sensor(sd, address, &value, 1); pr_info("register 0x%x contains 0x%x\n", address, value); } pr_info("ov9650 register state dump complete\n"); pr_info("Probing for which registers that are read/write\n"); for (address = 0; address < 0xff; address++) { u8 old_value, ctrl_value; u8 test_value[2] = {0xff, 0xff}; m5602_read_sensor(sd, address, &old_value, 1); m5602_write_sensor(sd, address, test_value, 1); m5602_read_sensor(sd, address, &ctrl_value, 1); if (ctrl_value == test_value[0]) pr_info("register 0x%x is writeable\n", address); else pr_info("register 0x%x is read only\n", address); /* Restore original value */ m5602_write_sensor(sd, address, &old_value, 1); } }
5 5 17 17 17 5 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 // SPDX-License-Identifier: GPL-2.0 #include <linux/highmem.h> #include <linux/module.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/types.h> #include "sysfs.h" /* * sysfs support for firmware loader */ void __fw_load_abort(struct fw_priv *fw_priv) { /* * There is a small window in which user can write to 'loading' * between loading done/aborted and disappearance of 'loading' */ if (fw_state_is_aborted(fw_priv) || fw_state_is_done(fw_priv)) return; fw_state_aborted(fw_priv); } #ifdef CONFIG_FW_LOADER_USER_HELPER static ssize_t timeout_show(const struct class *class, const struct class_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", __firmware_loading_timeout()); } /** * timeout_store() - set number of seconds to wait for firmware * @class: device class pointer * @attr: device attribute pointer * @buf: buffer to scan for timeout value * @count: number of bytes in @buf * * Sets the number of seconds to wait for the firmware. Once * this expires an error will be returned to the driver and no * firmware will be provided. * * Note: zero means 'wait forever'. **/ static ssize_t timeout_store(const struct class *class, const struct class_attribute *attr, const char *buf, size_t count) { int tmp_loading_timeout = simple_strtol(buf, NULL, 10); if (tmp_loading_timeout < 0) tmp_loading_timeout = 0; __fw_fallback_set_timeout(tmp_loading_timeout); return count; } static CLASS_ATTR_RW(timeout); static struct attribute *firmware_class_attrs[] = { &class_attr_timeout.attr, NULL, }; ATTRIBUTE_GROUPS(firmware_class); static int do_firmware_uevent(const struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env) { if (add_uevent_var(env, "FIRMWARE=%s", fw_sysfs->fw_priv->fw_name)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", __firmware_loading_timeout())) return -ENOMEM; if (add_uevent_var(env, "ASYNC=%d", fw_sysfs->nowait)) return -ENOMEM; return 0; } static int firmware_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); int err = 0; mutex_lock(&fw_lock); if (fw_sysfs->fw_priv) err = do_firmware_uevent(fw_sysfs, env); mutex_unlock(&fw_lock); return err; } #endif /* CONFIG_FW_LOADER_USER_HELPER */ static void fw_dev_release(struct device *dev) { struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); if (fw_sysfs->fw_upload_priv) fw_upload_free(fw_sysfs); kfree(fw_sysfs); } static struct class firmware_class = { .name = "firmware", #ifdef CONFIG_FW_LOADER_USER_HELPER .class_groups = firmware_class_groups, .dev_uevent = firmware_uevent, #endif .dev_release = fw_dev_release, }; int register_sysfs_loader(void) { int ret = class_register(&firmware_class); if (ret != 0) return ret; return register_firmware_config_sysctl(); } void unregister_sysfs_loader(void) { unregister_firmware_config_sysctl(); class_unregister(&firmware_class); } static ssize_t firmware_loading_show(struct device *dev, struct device_attribute *attr, char *buf) { struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); int loading = 0; mutex_lock(&fw_lock); if (fw_sysfs->fw_priv) loading = fw_state_is_loading(fw_sysfs->fw_priv); mutex_unlock(&fw_lock); return sysfs_emit(buf, "%d\n", loading); } /** * firmware_loading_store() - set value in the 'loading' control file * @dev: device pointer * @attr: device attribute pointer * @buf: buffer to scan for loading control value * @count: number of bytes in @buf * * The relevant values are: * * 1: Start a load, discarding any previous partial load. * 0: Conclude the load and hand the data to the driver code. * -1: Conclude the load with an error and discard any written data. **/ static ssize_t firmware_loading_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); struct fw_priv *fw_priv; ssize_t written = count; int loading = simple_strtol(buf, NULL, 10); mutex_lock(&fw_lock); fw_priv = fw_sysfs->fw_priv; if (fw_state_is_aborted(fw_priv) || fw_state_is_done(fw_priv)) goto out; switch (loading) { case 1: /* discarding any previous partial load */ fw_free_paged_buf(fw_priv); fw_state_start(fw_priv); break; case 0: if (fw_state_is_loading(fw_priv)) { int rc; /* * Several loading requests may be pending on * one same firmware buf, so let all requests * see the mapped 'buf->data' once the loading * is completed. */ rc = fw_map_paged_buf(fw_priv); if (rc) dev_err(dev, "%s: map pages failed\n", __func__); else rc = security_kernel_post_load_data(fw_priv->data, fw_priv->size, LOADING_FIRMWARE, "blob"); /* * Same logic as fw_load_abort, only the DONE bit * is ignored and we set ABORT only on failure. */ if (rc) { fw_state_aborted(fw_priv); written = rc; } else { fw_state_done(fw_priv); /* * If this is a user-initiated firmware upload * then start the upload in a worker thread now. */ rc = fw_upload_start(fw_sysfs); if (rc) written = rc; } break; } fallthrough; default: dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading); fallthrough; case -1: fw_load_abort(fw_sysfs); if (fw_sysfs->fw_upload_priv) fw_state_init(fw_sysfs->fw_priv); break; } out: mutex_unlock(&fw_lock); return written; } DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); static void firmware_rw_data(struct fw_priv *fw_priv, char *buffer, loff_t offset, size_t count, bool read) { if (read) memcpy(buffer, fw_priv->data + offset, count); else memcpy(fw_priv->data + offset, buffer, count); } static void firmware_rw(struct fw_priv *fw_priv, char *buffer, loff_t offset, size_t count, bool read) { while (count) { int page_nr = offset >> PAGE_SHIFT; int page_ofs = offset & (PAGE_SIZE - 1); int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count); if (read) memcpy_from_page(buffer, fw_priv->pages[page_nr], page_ofs, page_cnt); else memcpy_to_page(fw_priv->pages[page_nr], page_ofs, buffer, page_cnt); buffer += page_cnt; offset += page_cnt; count -= page_cnt; } } static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); struct fw_priv *fw_priv; ssize_t ret_count; mutex_lock(&fw_lock); fw_priv = fw_sysfs->fw_priv; if (!fw_priv || fw_state_is_done(fw_priv)) { ret_count = -ENODEV; goto out; } if (offset > fw_priv->size) { ret_count = 0; goto out; } if (count > fw_priv->size - offset) count = fw_priv->size - offset; ret_count = count; if (fw_priv->data) firmware_rw_data(fw_priv, buffer, offset, count, true); else firmware_rw(fw_priv, buffer, offset, count, true); out: mutex_unlock(&fw_lock); return ret_count; } static int fw_realloc_pages(struct fw_sysfs *fw_sysfs, int min_size) { int err; err = fw_grow_paged_buf(fw_sysfs->fw_priv, PAGE_ALIGN(min_size) >> PAGE_SHIFT); if (err) fw_load_abort(fw_sysfs); return err; } /** * firmware_data_write() - write method for firmware * @filp: open sysfs file * @kobj: kobject for the device * @bin_attr: bin_attr structure * @buffer: buffer being written * @offset: buffer offset for write in total data store area * @count: buffer size * * Data written to the 'data' attribute will be later handed to * the driver as a firmware image. **/ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); struct fw_priv *fw_priv; ssize_t retval; if (!capable(CAP_SYS_RAWIO)) return -EPERM; mutex_lock(&fw_lock); fw_priv = fw_sysfs->fw_priv; if (!fw_priv || fw_state_is_done(fw_priv)) { retval = -ENODEV; goto out; } if (fw_priv->data) { if (offset + count > fw_priv->allocated_size) { retval = -ENOMEM; goto out; } firmware_rw_data(fw_priv, buffer, offset, count, false); retval = count; } else { retval = fw_realloc_pages(fw_sysfs, offset + count); if (retval) goto out; retval = count; firmware_rw(fw_priv, buffer, offset, count, false); } fw_priv->size = max_t(size_t, offset + count, fw_priv->size); out: mutex_unlock(&fw_lock); return retval; } static struct bin_attribute firmware_attr_data = { .attr = { .name = "data", .mode = 0644 }, .size = 0, .read = firmware_data_read, .write = firmware_data_write, }; static struct attribute *fw_dev_attrs[] = { &dev_attr_loading.attr, #ifdef CONFIG_FW_UPLOAD &dev_attr_cancel.attr, &dev_attr_status.attr, &dev_attr_error.attr, &dev_attr_remaining_size.attr, #endif NULL }; static struct bin_attribute *fw_dev_bin_attrs[] = { &firmware_attr_data, NULL }; static const struct attribute_group fw_dev_attr_group = { .attrs = fw_dev_attrs, .bin_attrs = fw_dev_bin_attrs, #ifdef CONFIG_FW_UPLOAD .is_visible = fw_upload_is_visible, #endif }; static const struct attribute_group *fw_dev_attr_groups[] = { &fw_dev_attr_group, NULL }; struct fw_sysfs * fw_create_instance(struct firmware *firmware, const char *fw_name, struct device *device, u32 opt_flags) { struct fw_sysfs *fw_sysfs; struct device *f_dev; fw_sysfs = kzalloc(sizeof(*fw_sysfs), GFP_KERNEL); if (!fw_sysfs) { fw_sysfs = ERR_PTR(-ENOMEM); goto exit; } fw_sysfs->nowait = !!(opt_flags & FW_OPT_NOWAIT); fw_sysfs->fw = firmware; f_dev = &fw_sysfs->dev; device_initialize(f_dev); dev_set_name(f_dev, "%s", fw_name); f_dev->parent = device; f_dev->class = &firmware_class; f_dev->groups = fw_dev_attr_groups; exit: return fw_sysfs; }
2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 /* * Atheros CARL9170 driver * * mac80211 interaction code * * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2009, 2010, Christian Lamparter <chunkeey@googlemail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, see * http://www.gnu.org/licenses/. * * This file incorporates work covered by the following copyright and * permission notice: * Copyright (c) 2007-2008 Atheros Communications, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/slab.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/random.h> #include <net/mac80211.h> #include <net/cfg80211.h> #include "hw.h" #include "carl9170.h" #include "cmd.h" static bool modparam_nohwcrypt; module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444); MODULE_PARM_DESC(nohwcrypt, "Disable hardware crypto offload."); int modparam_noht; module_param_named(noht, modparam_noht, int, 0444); MODULE_PARM_DESC(noht, "Disable MPDU aggregation."); #define RATE(_bitrate, _hw_rate, _txpidx, _flags) { \ .bitrate = (_bitrate), \ .flags = (_flags), \ .hw_value = (_hw_rate) | (_txpidx) << 4, \ } struct ieee80211_rate __carl9170_ratetable[] = { RATE(10, 0, 0, 0), RATE(20, 1, 1, IEEE80211_RATE_SHORT_PREAMBLE), RATE(55, 2, 2, IEEE80211_RATE_SHORT_PREAMBLE), RATE(110, 3, 3, IEEE80211_RATE_SHORT_PREAMBLE), RATE(60, 0xb, 0, 0), RATE(90, 0xf, 0, 0), RATE(120, 0xa, 0, 0), RATE(180, 0xe, 0, 0), RATE(240, 0x9, 0, 0), RATE(360, 0xd, 1, 0), RATE(480, 0x8, 2, 0), RATE(540, 0xc, 3, 0), }; #undef RATE #define carl9170_g_ratetable (__carl9170_ratetable + 0) #define carl9170_g_ratetable_size 12 #define carl9170_a_ratetable (__carl9170_ratetable + 4) #define carl9170_a_ratetable_size 8 /* * NB: The hw_value is used as an index into the carl9170_phy_freq_params * array in phy.c so that we don't have to do frequency lookups! */ #define CHAN(_freq, _idx) { \ .center_freq = (_freq), \ .hw_value = (_idx), \ .max_power = 18, /* XXX */ \ } static struct ieee80211_channel carl9170_2ghz_chantable[] = { CHAN(2412, 0), CHAN(2417, 1), CHAN(2422, 2), CHAN(2427, 3), CHAN(2432, 4), CHAN(2437, 5), CHAN(2442, 6), CHAN(2447, 7), CHAN(2452, 8), CHAN(2457, 9), CHAN(2462, 10), CHAN(2467, 11), CHAN(2472, 12), CHAN(2484, 13), }; static struct ieee80211_channel carl9170_5ghz_chantable[] = { CHAN(4920, 14), CHAN(4940, 15), CHAN(4960, 16), CHAN(4980, 17), CHAN(5040, 18), CHAN(5060, 19), CHAN(5080, 20), CHAN(5180, 21), CHAN(5200, 22), CHAN(5220, 23), CHAN(5240, 24), CHAN(5260, 25), CHAN(5280, 26), CHAN(5300, 27), CHAN(5320, 28), CHAN(5500, 29), CHAN(5520, 30), CHAN(5540, 31), CHAN(5560, 32), CHAN(5580, 33), CHAN(5600, 34), CHAN(5620, 35), CHAN(5640, 36), CHAN(5660, 37), CHAN(5680, 38), CHAN(5700, 39), CHAN(5745, 40), CHAN(5765, 41), CHAN(5785, 42), CHAN(5805, 43), CHAN(5825, 44), CHAN(5170, 45), CHAN(5190, 46), CHAN(5210, 47), CHAN(5230, 48), }; #undef CHAN #define CARL9170_HT_CAP \ { \ .ht_supported = true, \ .cap = IEEE80211_HT_CAP_MAX_AMSDU | \ IEEE80211_HT_CAP_SUP_WIDTH_20_40 | \ IEEE80211_HT_CAP_SGI_40 | \ IEEE80211_HT_CAP_DSSSCCK40 | \ IEEE80211_HT_CAP_SM_PS, \ .ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K, \ .ampdu_density = IEEE80211_HT_MPDU_DENSITY_8, \ .mcs = { \ .rx_mask = { 0xff, 0xff, 0, 0, 0x1, 0, 0, 0, 0, 0, }, \ .rx_highest = cpu_to_le16(300), \ .tx_params = IEEE80211_HT_MCS_TX_DEFINED, \ }, \ } static struct ieee80211_supported_band carl9170_band_2GHz = { .channels = carl9170_2ghz_chantable, .n_channels = ARRAY_SIZE(carl9170_2ghz_chantable), .bitrates = carl9170_g_ratetable, .n_bitrates = carl9170_g_ratetable_size, .ht_cap = CARL9170_HT_CAP, }; static struct ieee80211_supported_band carl9170_band_5GHz = { .channels = carl9170_5ghz_chantable, .n_channels = ARRAY_SIZE(carl9170_5ghz_chantable), .bitrates = carl9170_a_ratetable, .n_bitrates = carl9170_a_ratetable_size, .ht_cap = CARL9170_HT_CAP, }; static void carl9170_ampdu_gc(struct ar9170 *ar) { struct carl9170_sta_tid *tid_info; LIST_HEAD(tid_gc); rcu_read_lock(); list_for_each_entry_rcu(tid_info, &ar->tx_ampdu_list, list) { spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state == CARL9170_TID_STATE_SHUTDOWN) { tid_info->state = CARL9170_TID_STATE_KILLED; list_del_rcu(&tid_info->list); ar->tx_ampdu_list_len--; list_add_tail(&tid_info->tmp_list, &tid_gc); } spin_unlock_bh(&ar->tx_ampdu_list_lock); } rcu_assign_pointer(ar->tx_ampdu_iter, tid_info); rcu_read_unlock(); synchronize_rcu(); while (!list_empty(&tid_gc)) { struct sk_buff *skb; tid_info = list_first_entry(&tid_gc, struct carl9170_sta_tid, tmp_list); while ((skb = __skb_dequeue(&tid_info->queue))) carl9170_tx_status(ar, skb, false); list_del_init(&tid_info->tmp_list); kfree(tid_info); } } static void carl9170_flush(struct ar9170 *ar, bool drop_queued) { if (drop_queued) { int i; /* * We can only drop frames which have not been uploaded * to the device yet. */ for (i = 0; i < ar->hw->queues; i++) { struct sk_buff *skb; while ((skb = skb_dequeue(&ar->tx_pending[i]))) { struct ieee80211_tx_info *info; info = IEEE80211_SKB_CB(skb); if (info->flags & IEEE80211_TX_CTL_AMPDU) atomic_dec(&ar->tx_ampdu_upload); carl9170_tx_status(ar, skb, false); } } } /* Wait for all other outstanding frames to timeout. */ if (atomic_read(&ar->tx_total_queued)) WARN_ON(wait_for_completion_timeout(&ar->tx_flush, HZ) == 0); } static void carl9170_flush_ba(struct ar9170 *ar) { struct sk_buff_head free; struct carl9170_sta_tid *tid_info; struct sk_buff *skb; __skb_queue_head_init(&free); rcu_read_lock(); spin_lock_bh(&ar->tx_ampdu_list_lock); list_for_each_entry_rcu(tid_info, &ar->tx_ampdu_list, list) { if (tid_info->state > CARL9170_TID_STATE_SUSPEND) { tid_info->state = CARL9170_TID_STATE_SUSPEND; spin_lock(&tid_info->lock); while ((skb = __skb_dequeue(&tid_info->queue))) __skb_queue_tail(&free, skb); spin_unlock(&tid_info->lock); } } spin_unlock_bh(&ar->tx_ampdu_list_lock); rcu_read_unlock(); while ((skb = __skb_dequeue(&free))) carl9170_tx_status(ar, skb, false); } static void carl9170_zap_queues(struct ar9170 *ar) { struct carl9170_vif_info *cvif; unsigned int i; carl9170_ampdu_gc(ar); carl9170_flush_ba(ar); carl9170_flush(ar, true); for (i = 0; i < ar->hw->queues; i++) { spin_lock_bh(&ar->tx_status[i].lock); while (!skb_queue_empty(&ar->tx_status[i])) { struct sk_buff *skb; skb = skb_peek(&ar->tx_status[i]); carl9170_tx_get_skb(skb); spin_unlock_bh(&ar->tx_status[i].lock); carl9170_tx_drop(ar, skb); spin_lock_bh(&ar->tx_status[i].lock); carl9170_tx_put_skb(skb); } spin_unlock_bh(&ar->tx_status[i].lock); } BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_SOFT < 1); BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_HARD < CARL9170_NUM_TX_LIMIT_SOFT); BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_HARD >= CARL9170_BAW_BITS); /* reinitialize queues statistics */ memset(&ar->tx_stats, 0, sizeof(ar->tx_stats)); for (i = 0; i < ar->hw->queues; i++) ar->tx_stats[i].limit = CARL9170_NUM_TX_LIMIT_HARD; bitmap_zero(ar->mem_bitmap, ar->fw.mem_blocks); rcu_read_lock(); list_for_each_entry_rcu(cvif, &ar->vif_list, list) { spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(cvif->beacon); cvif->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); } rcu_read_unlock(); atomic_set(&ar->tx_ampdu_upload, 0); atomic_set(&ar->tx_ampdu_scheduler, 0); atomic_set(&ar->tx_total_pending, 0); atomic_set(&ar->tx_total_queued, 0); atomic_set(&ar->mem_free_blocks, ar->fw.mem_blocks); } #define CARL9170_FILL_QUEUE(queue, ai_fs, cwmin, cwmax, _txop) \ do { \ queue.aifs = ai_fs; \ queue.cw_min = cwmin; \ queue.cw_max = cwmax; \ queue.txop = _txop; \ } while (0) static int carl9170_op_start(struct ieee80211_hw *hw) { struct ar9170 *ar = hw->priv; int err, i; mutex_lock(&ar->mutex); carl9170_zap_queues(ar); /* reset QoS defaults */ CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_VO], 2, 3, 7, 47); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_VI], 2, 7, 15, 94); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_BE], 3, 15, 1023, 0); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_BK], 7, 15, 1023, 0); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_SPECIAL], 2, 3, 7, 0); ar->current_factor = ar->current_density = -1; /* "The first key is unique." */ ar->usedkeys = 1; ar->filter_state = 0; ar->ps.last_action = jiffies; ar->ps.last_slept = jiffies; ar->erp_mode = CARL9170_ERP_AUTO; /* Set "disable hw crypto offload" whenever the module parameter * nohwcrypt is true or if the firmware does not support it. */ ar->disable_offload = modparam_nohwcrypt | ar->fw.disable_offload_fw; ar->rx_software_decryption = ar->disable_offload; for (i = 0; i < ar->hw->queues; i++) { ar->queue_stop_timeout[i] = jiffies; ar->max_queue_stop_timeout[i] = 0; } atomic_set(&ar->mem_allocs, 0); err = carl9170_usb_open(ar); if (err) goto out; err = carl9170_init_mac(ar); if (err) goto out; err = carl9170_set_qos(ar); if (err) goto out; if (ar->fw.rx_filter) { err = carl9170_rx_filter(ar, CARL9170_RX_FILTER_OTHER_RA | CARL9170_RX_FILTER_CTL_OTHER | CARL9170_RX_FILTER_BAD); if (err) goto out; } err = carl9170_write_reg(ar, AR9170_MAC_REG_DMA_TRIGGER, AR9170_DMA_TRIGGER_RXQ); if (err) goto out; /* Clear key-cache */ for (i = 0; i < AR9170_CAM_MAX_USER + 4; i++) { err = carl9170_upload_key(ar, i, NULL, AR9170_ENC_ALG_NONE, 0, NULL, 0); if (err) goto out; err = carl9170_upload_key(ar, i, NULL, AR9170_ENC_ALG_NONE, 1, NULL, 0); if (err) goto out; if (i < AR9170_CAM_MAX_USER) { err = carl9170_disable_key(ar, i); if (err) goto out; } } carl9170_set_state_when(ar, CARL9170_IDLE, CARL9170_STARTED); ieee80211_queue_delayed_work(ar->hw, &ar->stat_work, round_jiffies(msecs_to_jiffies(CARL9170_STAT_WORK))); ieee80211_wake_queues(ar->hw); err = 0; out: mutex_unlock(&ar->mutex); return err; } static void carl9170_cancel_worker(struct ar9170 *ar) { cancel_delayed_work_sync(&ar->stat_work); cancel_delayed_work_sync(&ar->tx_janitor); #ifdef CONFIG_CARL9170_LEDS cancel_delayed_work_sync(&ar->led_work); #endif /* CONFIG_CARL9170_LEDS */ cancel_work_sync(&ar->ps_work); cancel_work_sync(&ar->ping_work); cancel_work_sync(&ar->ampdu_work); } static void carl9170_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ar9170 *ar = hw->priv; carl9170_set_state_when(ar, CARL9170_STARTED, CARL9170_IDLE); ieee80211_stop_queues(ar->hw); mutex_lock(&ar->mutex); if (IS_ACCEPTING_CMD(ar)) { RCU_INIT_POINTER(ar->beacon_iter, NULL); carl9170_led_set_state(ar, 0); /* stop DMA */ carl9170_write_reg(ar, AR9170_MAC_REG_DMA_TRIGGER, 0); carl9170_usb_stop(ar); } carl9170_zap_queues(ar); mutex_unlock(&ar->mutex); carl9170_cancel_worker(ar); } static void carl9170_restart_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, restart_work); int err = -EIO; ar->usedkeys = 0; ar->filter_state = 0; carl9170_cancel_worker(ar); mutex_lock(&ar->mutex); if (!ar->force_usb_reset) { err = carl9170_usb_restart(ar); if (net_ratelimit()) { if (err) dev_err(&ar->udev->dev, "Failed to restart device (%d).\n", err); else dev_info(&ar->udev->dev, "device restarted successfully.\n"); } } carl9170_zap_queues(ar); mutex_unlock(&ar->mutex); if (!err && !ar->force_usb_reset) { ar->restart_counter++; atomic_set(&ar->pending_restarts, 0); ieee80211_restart_hw(ar->hw); } else { /* * The reset was unsuccessful and the device seems to * be dead. But there's still one option: a low-level * usb subsystem reset... */ carl9170_usb_reset(ar); } } void carl9170_restart(struct ar9170 *ar, const enum carl9170_restart_reasons r) { carl9170_set_state_when(ar, CARL9170_STARTED, CARL9170_IDLE); /* * Sometimes, an error can trigger several different reset events. * By ignoring these *surplus* reset events, the device won't be * killed again, right after it has recovered. */ if (atomic_inc_return(&ar->pending_restarts) > 1) { dev_dbg(&ar->udev->dev, "ignoring restart (%d)\n", r); return; } ieee80211_stop_queues(ar->hw); dev_err(&ar->udev->dev, "restart device (%d)\n", r); if (!WARN_ON(r == CARL9170_RR_NO_REASON) || !WARN_ON(r >= __CARL9170_RR_LAST)) ar->last_reason = r; if (!ar->registered) return; if (!IS_ACCEPTING_CMD(ar) || ar->needs_full_reset) ar->force_usb_reset = true; ieee80211_queue_work(ar->hw, &ar->restart_work); /* * At this point, the device instance might have vanished/disabled. * So, don't put any code which access the ar9170 struct * without proper protection. */ } static void carl9170_ping_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ping_work); int err; if (!IS_STARTED(ar)) return; mutex_lock(&ar->mutex); err = carl9170_echo_test(ar, 0xdeadbeef); if (err) carl9170_restart(ar, CARL9170_RR_UNRESPONSIVE_DEVICE); mutex_unlock(&ar->mutex); } static int carl9170_init_interface(struct ar9170 *ar, struct ieee80211_vif *vif) { struct ath_common *common = &ar->common; int err; if (!vif) { WARN_ON_ONCE(IS_STARTED(ar)); return 0; } memcpy(common->macaddr, vif->addr, ETH_ALEN); /* We have to fall back to software crypto, whenever * the user choose to participates in an IBSS. HW * offload for IBSS RSN is not supported by this driver. * * NOTE: If the previous main interface has already * disabled hw crypto offload, we have to keep this * previous disable_offload setting as it was. * Altough ideally, we should notify mac80211 and tell * it to forget about any HW crypto offload for now. */ ar->disable_offload |= ((vif->type != NL80211_IFTYPE_STATION) && (vif->type != NL80211_IFTYPE_AP)); /* The driver used to have P2P GO+CLIENT support, * but since this was dropped and we don't know if * there are any gremlins lurking in the shadows, * so best we keep HW offload disabled for P2P. */ ar->disable_offload |= vif->p2p; ar->rx_software_decryption = ar->disable_offload; err = carl9170_set_operating_mode(ar); return err; } static int carl9170_op_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct carl9170_vif_info *vif_priv = (void *) vif->drv_priv; struct ieee80211_vif *main_vif, *old_main = NULL; struct ar9170 *ar = hw->priv; int vif_id = -1, err = 0; mutex_lock(&ar->mutex); rcu_read_lock(); if (vif_priv->active) { /* * Skip the interface structure initialization, * if the vif survived the _restart call. */ vif_id = vif_priv->id; vif_priv->enable_beacon = false; spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(vif_priv->beacon); vif_priv->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); goto init; } /* Because the AR9170 HW's MAC doesn't provide full support for * multiple, independent interfaces [of different operation modes]. * We have to select ONE main interface [main mode of HW], but we * can have multiple slaves [AKA: entry in the ACK-table]. * * The first (from HEAD/TOP) interface in the ar->vif_list is * always the main intf. All following intfs in this list * are considered to be slave intfs. */ main_vif = carl9170_get_main_vif(ar); if (main_vif) { switch (main_vif->type) { case NL80211_IFTYPE_STATION: if (vif->type == NL80211_IFTYPE_STATION) break; err = -EBUSY; rcu_read_unlock(); goto unlock; case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: if ((vif->type == NL80211_IFTYPE_STATION) || (vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_MESH_POINT)) break; err = -EBUSY; rcu_read_unlock(); goto unlock; default: rcu_read_unlock(); goto unlock; } } vif_id = bitmap_find_free_region(&ar->vif_bitmap, ar->fw.vif_num, 0); if (vif_id < 0) { rcu_read_unlock(); err = -ENOSPC; goto unlock; } BUG_ON(ar->vif_priv[vif_id].id != vif_id); vif_priv->active = true; vif_priv->id = vif_id; vif_priv->enable_beacon = false; ar->vifs++; if (old_main) { /* We end up in here, if the main interface is being replaced. * Put the new main interface at the HEAD of the list and the * previous inteface will automatically become second in line. */ list_add_rcu(&vif_priv->list, &ar->vif_list); } else { /* Add new inteface. If the list is empty, it will become the * main inteface, otherwise it will be slave. */ list_add_tail_rcu(&vif_priv->list, &ar->vif_list); } rcu_assign_pointer(ar->vif_priv[vif_id].vif, vif); init: main_vif = carl9170_get_main_vif(ar); if (main_vif == vif) { rcu_assign_pointer(ar->beacon_iter, vif_priv); rcu_read_unlock(); if (old_main) { struct carl9170_vif_info *old_main_priv = (void *) old_main->drv_priv; /* downgrade old main intf to slave intf. * NOTE: We are no longer under rcu_read_lock. * But we are still holding ar->mutex, so the * vif data [id, addr] is safe. */ err = carl9170_mod_virtual_mac(ar, old_main_priv->id, old_main->addr); if (err) goto unlock; } err = carl9170_init_interface(ar, vif); if (err) goto unlock; } else { rcu_read_unlock(); err = carl9170_mod_virtual_mac(ar, vif_id, vif->addr); if (err) goto unlock; } if (ar->fw.tx_seq_table) { err = carl9170_write_reg(ar, ar->fw.tx_seq_table + vif_id * 4, 0); if (err) goto unlock; } unlock: if (err && (vif_id >= 0)) { vif_priv->active = false; bitmap_release_region(&ar->vif_bitmap, vif_id, 0); ar->vifs--; RCU_INIT_POINTER(ar->vif_priv[vif_id].vif, NULL); list_del_rcu(&vif_priv->list); mutex_unlock(&ar->mutex); synchronize_rcu(); } else { if (ar->vifs > 1) ar->ps.off_override |= PS_OFF_VIF; mutex_unlock(&ar->mutex); } return err; } static void carl9170_op_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct carl9170_vif_info *vif_priv = (void *) vif->drv_priv; struct ieee80211_vif *main_vif; struct ar9170 *ar = hw->priv; unsigned int id; mutex_lock(&ar->mutex); if (WARN_ON_ONCE(!vif_priv->active)) goto unlock; ar->vifs--; rcu_read_lock(); main_vif = carl9170_get_main_vif(ar); id = vif_priv->id; vif_priv->active = false; WARN_ON(vif_priv->enable_beacon); vif_priv->enable_beacon = false; list_del_rcu(&vif_priv->list); RCU_INIT_POINTER(ar->vif_priv[id].vif, NULL); if (vif == main_vif) { rcu_read_unlock(); if (ar->vifs) { WARN_ON(carl9170_init_interface(ar, carl9170_get_main_vif(ar))); } else { carl9170_set_operating_mode(ar); } } else { rcu_read_unlock(); WARN_ON(carl9170_mod_virtual_mac(ar, id, NULL)); } carl9170_update_beacon(ar, false); carl9170_flush_cab(ar, id); spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(vif_priv->beacon); vif_priv->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); bitmap_release_region(&ar->vif_bitmap, id, 0); carl9170_set_beacon_timers(ar); if (ar->vifs == 1) ar->ps.off_override &= ~PS_OFF_VIF; unlock: mutex_unlock(&ar->mutex); synchronize_rcu(); } void carl9170_ps_check(struct ar9170 *ar) { ieee80211_queue_work(ar->hw, &ar->ps_work); } /* caller must hold ar->mutex */ static int carl9170_ps_update(struct ar9170 *ar) { bool ps = false; int err = 0; if (!ar->ps.off_override) ps = (ar->hw->conf.flags & IEEE80211_CONF_PS); if (ps != ar->ps.state) { err = carl9170_powersave(ar, ps); if (err) return err; if (ar->ps.state && !ps) { ar->ps.sleep_ms = jiffies_to_msecs(jiffies - ar->ps.last_action); } if (ps) ar->ps.last_slept = jiffies; ar->ps.last_action = jiffies; ar->ps.state = ps; } return 0; } static void carl9170_ps_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ps_work); mutex_lock(&ar->mutex); if (IS_STARTED(ar)) WARN_ON_ONCE(carl9170_ps_update(ar) != 0); mutex_unlock(&ar->mutex); } static int carl9170_update_survey(struct ar9170 *ar, bool flush, bool noise) { int err; if (noise) { err = carl9170_get_noisefloor(ar); if (err) return err; } if (ar->fw.hw_counters) { err = carl9170_collect_tally(ar); if (err) return err; } if (flush) memset(&ar->tally, 0, sizeof(ar->tally)); return 0; } static void carl9170_stat_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, stat_work.work); int err; mutex_lock(&ar->mutex); err = carl9170_update_survey(ar, false, true); mutex_unlock(&ar->mutex); if (err) return; ieee80211_queue_delayed_work(ar->hw, &ar->stat_work, round_jiffies(msecs_to_jiffies(CARL9170_STAT_WORK))); } static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) { struct ar9170 *ar = hw->priv; int err = 0; mutex_lock(&ar->mutex); if (changed & IEEE80211_CONF_CHANGE_LISTEN_INTERVAL) { /* TODO */ err = 0; } if (changed & IEEE80211_CONF_CHANGE_PS) { err = carl9170_ps_update(ar); if (err) goto out; } if (changed & IEEE80211_CONF_CHANGE_SMPS) { /* TODO */ err = 0; } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { enum nl80211_channel_type channel_type = cfg80211_get_chandef_type(&hw->conf.chandef); /* adjust slot time for 5 GHz */ err = carl9170_set_slot_time(ar); if (err) goto out; err = carl9170_update_survey(ar, true, false); if (err) goto out; err = carl9170_set_channel(ar, hw->conf.chandef.chan, channel_type); if (err) goto out; err = carl9170_update_survey(ar, false, true); if (err) goto out; err = carl9170_set_dyn_sifs_ack(ar); if (err) goto out; err = carl9170_set_rts_cts_rate(ar); if (err) goto out; } if (changed & IEEE80211_CONF_CHANGE_POWER) { err = carl9170_set_mac_tpc(ar, ar->hw->conf.chandef.chan); if (err) goto out; } out: mutex_unlock(&ar->mutex); return err; } static u64 carl9170_op_prepare_multicast(struct ieee80211_hw *hw, struct netdev_hw_addr_list *mc_list) { struct netdev_hw_addr *ha; u64 mchash; /* always get broadcast frames */ mchash = 1ULL << (0xff >> 2); netdev_hw_addr_list_for_each(ha, mc_list) mchash |= 1ULL << (ha->addr[5] >> 2); return mchash; } static void carl9170_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, u64 multicast) { struct ar9170 *ar = hw->priv; /* mask supported flags */ *new_flags &= FIF_ALLMULTI | ar->rx_filter_caps; if (!IS_ACCEPTING_CMD(ar)) return; mutex_lock(&ar->mutex); ar->filter_state = *new_flags; /* * We can support more by setting the sniffer bit and * then checking the error flags, later. */ if (*new_flags & FIF_ALLMULTI) multicast = ~0ULL; if (multicast != ar->cur_mc_hash) WARN_ON(carl9170_update_multicast(ar, multicast)); if (changed_flags & FIF_OTHER_BSS) { ar->sniffer_enabled = !!(*new_flags & FIF_OTHER_BSS); WARN_ON(carl9170_set_operating_mode(ar)); } if (ar->fw.rx_filter && changed_flags & ar->rx_filter_caps) { u32 rx_filter = 0; if (!ar->fw.ba_filter) rx_filter |= CARL9170_RX_FILTER_CTL_OTHER; if (!(*new_flags & (FIF_FCSFAIL | FIF_PLCPFAIL))) rx_filter |= CARL9170_RX_FILTER_BAD; if (!(*new_flags & FIF_CONTROL)) rx_filter |= CARL9170_RX_FILTER_CTL_OTHER; if (!(*new_flags & FIF_PSPOLL)) rx_filter |= CARL9170_RX_FILTER_CTL_PSPOLL; if (!(*new_flags & FIF_OTHER_BSS)) { rx_filter |= CARL9170_RX_FILTER_OTHER_RA; rx_filter |= CARL9170_RX_FILTER_DECRY_FAIL; } WARN_ON(carl9170_rx_filter(ar, rx_filter)); } mutex_unlock(&ar->mutex); } static void carl9170_op_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf, u64 changed) { struct ar9170 *ar = hw->priv; struct ath_common *common = &ar->common; int err = 0; struct carl9170_vif_info *vif_priv; struct ieee80211_vif *main_vif; mutex_lock(&ar->mutex); vif_priv = (void *) vif->drv_priv; main_vif = carl9170_get_main_vif(ar); if (WARN_ON(!main_vif)) goto out; if (changed & BSS_CHANGED_BEACON_ENABLED) { struct carl9170_vif_info *iter; int i = 0; vif_priv->enable_beacon = bss_conf->enable_beacon; rcu_read_lock(); list_for_each_entry_rcu(iter, &ar->vif_list, list) { if (iter->active && iter->enable_beacon) i++; } rcu_read_unlock(); ar->beacon_enabled = i; } if (changed & BSS_CHANGED_BEACON) { err = carl9170_update_beacon(ar, false); if (err) goto out; } if (changed & (BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_INT)) { if (main_vif != vif) { bss_conf->beacon_int = main_vif->bss_conf.beacon_int; bss_conf->dtim_period = main_vif->bss_conf.dtim_period; } /* * Therefore a hard limit for the broadcast traffic should * prevent false alarms. */ if (vif->type != NL80211_IFTYPE_STATION && (bss_conf->beacon_int * bss_conf->dtim_period >= (CARL9170_QUEUE_STUCK_TIMEOUT / 2))) { err = -EINVAL; goto out; } err = carl9170_set_beacon_timers(ar); if (err) goto out; } if (changed & BSS_CHANGED_HT) { /* TODO */ err = 0; if (err) goto out; } if (main_vif != vif) goto out; /* * The following settings can only be changed by the * master interface. */ if (changed & BSS_CHANGED_BSSID) { memcpy(common->curbssid, bss_conf->bssid, ETH_ALEN); err = carl9170_set_operating_mode(ar); if (err) goto out; } if (changed & BSS_CHANGED_ASSOC) { ar->common.curaid = vif->cfg.aid; err = carl9170_set_beacon_timers(ar); if (err) goto out; } if (changed & BSS_CHANGED_ERP_SLOT) { err = carl9170_set_slot_time(ar); if (err) goto out; } if (changed & BSS_CHANGED_BASIC_RATES) { err = carl9170_set_mac_rates(ar); if (err) goto out; } out: WARN_ON_ONCE(err && IS_STARTED(ar)); mutex_unlock(&ar->mutex); } static u64 carl9170_op_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ar9170 *ar = hw->priv; struct carl9170_tsf_rsp tsf; int err; mutex_lock(&ar->mutex); err = carl9170_exec_cmd(ar, CARL9170_CMD_READ_TSF, 0, NULL, sizeof(tsf), &tsf); mutex_unlock(&ar->mutex); if (WARN_ON(err)) return 0; return le64_to_cpu(tsf.tsf_64); } static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) { struct ar9170 *ar = hw->priv; int err = 0, i; u8 ktype; if (ar->disable_offload || !vif) return -EOPNOTSUPP; /* Fall back to software encryption whenever the driver is connected * to more than one network. * * This is very unfortunate, because some machines cannot handle * the high througput speed in 802.11n networks. */ if (!is_main_vif(ar, vif)) { mutex_lock(&ar->mutex); goto err_softw; } /* * While the hardware supports *catch-all* key, for offloading * group-key en-/de-cryption. The way of how the hardware * decides which keyId maps to which key, remains a mystery... */ if ((vif->type != NL80211_IFTYPE_STATION && vif->type != NL80211_IFTYPE_ADHOC) && !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) return -EOPNOTSUPP; switch (key->cipher) { case WLAN_CIPHER_SUITE_WEP40: ktype = AR9170_ENC_ALG_WEP64; break; case WLAN_CIPHER_SUITE_WEP104: ktype = AR9170_ENC_ALG_WEP128; break; case WLAN_CIPHER_SUITE_TKIP: ktype = AR9170_ENC_ALG_TKIP; break; case WLAN_CIPHER_SUITE_CCMP: ktype = AR9170_ENC_ALG_AESCCMP; key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; break; default: return -EOPNOTSUPP; } mutex_lock(&ar->mutex); if (cmd == SET_KEY) { if (!IS_STARTED(ar)) { err = -EOPNOTSUPP; goto out; } if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) { sta = NULL; i = 64 + key->keyidx; } else { for (i = 0; i < 64; i++) if (!(ar->usedkeys & BIT(i))) break; if (i == 64) goto err_softw; } key->hw_key_idx = i; err = carl9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 0, key->key, min_t(u8, 16, key->keylen)); if (err) goto out; if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = carl9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 1, key->key + 16, 16); if (err) goto out; /* * hardware is not capable generating MMIC * of fragmented frames! */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; } if (i < 64) ar->usedkeys |= BIT(i); key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; } else { if (!IS_STARTED(ar)) { /* The device is gone... together with the key ;-) */ err = 0; goto out; } if (key->hw_key_idx < 64) { ar->usedkeys &= ~BIT(key->hw_key_idx); } else { err = carl9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 0, NULL, 0); if (err) goto out; if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = carl9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 1, NULL, 0); if (err) goto out; } } err = carl9170_disable_key(ar, key->hw_key_idx); if (err) goto out; } out: mutex_unlock(&ar->mutex); return err; err_softw: if (!ar->rx_software_decryption) { ar->rx_software_decryption = true; carl9170_set_operating_mode(ar); } mutex_unlock(&ar->mutex); return -ENOSPC; } static int carl9170_op_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; unsigned int i; atomic_set(&sta_info->pending_frames, 0); if (sta->deflink.ht_cap.ht_supported) { if (sta->deflink.ht_cap.ampdu_density > 6) { /* * HW does support 16us AMPDU density. * No HT-Xmit for station. */ return 0; } for (i = 0; i < ARRAY_SIZE(sta_info->agg); i++) RCU_INIT_POINTER(sta_info->agg[i], NULL); sta_info->ampdu_max_len = 1 << (3 + sta->deflink.ht_cap.ampdu_factor); sta_info->ht_sta = true; } return 0; } static int carl9170_op_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ar9170 *ar = hw->priv; struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; unsigned int i; bool cleanup = false; if (sta->deflink.ht_cap.ht_supported) { sta_info->ht_sta = false; rcu_read_lock(); for (i = 0; i < ARRAY_SIZE(sta_info->agg); i++) { struct carl9170_sta_tid *tid_info; tid_info = rcu_dereference(sta_info->agg[i]); RCU_INIT_POINTER(sta_info->agg[i], NULL); if (!tid_info) continue; spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state > CARL9170_TID_STATE_SHUTDOWN) tid_info->state = CARL9170_TID_STATE_SHUTDOWN; spin_unlock_bh(&ar->tx_ampdu_list_lock); cleanup = true; } rcu_read_unlock(); if (cleanup) carl9170_ampdu_gc(ar); } return 0; } static int carl9170_op_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, u16 queue, const struct ieee80211_tx_queue_params *param) { struct ar9170 *ar = hw->priv; int ret; mutex_lock(&ar->mutex); memcpy(&ar->edcf[ar9170_qmap(queue)], param, sizeof(*param)); ret = carl9170_set_qos(ar); mutex_unlock(&ar->mutex); return ret; } static void carl9170_ampdu_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ampdu_work); if (!IS_STARTED(ar)) return; mutex_lock(&ar->mutex); carl9170_ampdu_gc(ar); mutex_unlock(&ar->mutex); } static int carl9170_op_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params) { struct ieee80211_sta *sta = params->sta; enum ieee80211_ampdu_mlme_action action = params->action; u16 tid = params->tid; u16 *ssn = &params->ssn; struct ar9170 *ar = hw->priv; struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; struct carl9170_sta_tid *tid_info; if (modparam_noht) return -EOPNOTSUPP; switch (action) { case IEEE80211_AMPDU_TX_START: if (!sta_info->ht_sta) return -EOPNOTSUPP; tid_info = kzalloc(sizeof(struct carl9170_sta_tid), GFP_KERNEL); if (!tid_info) return -ENOMEM; tid_info->hsn = tid_info->bsn = tid_info->snx = (*ssn); tid_info->state = CARL9170_TID_STATE_PROGRESS; tid_info->tid = tid; tid_info->max = sta_info->ampdu_max_len; tid_info->sta = sta; tid_info->vif = vif; INIT_LIST_HEAD(&tid_info->list); INIT_LIST_HEAD(&tid_info->tmp_list); skb_queue_head_init(&tid_info->queue); spin_lock_init(&tid_info->lock); spin_lock_bh(&ar->tx_ampdu_list_lock); ar->tx_ampdu_list_len++; list_add_tail_rcu(&tid_info->list, &ar->tx_ampdu_list); rcu_assign_pointer(sta_info->agg[tid], tid_info); spin_unlock_bh(&ar->tx_ampdu_list_lock); return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: rcu_read_lock(); tid_info = rcu_dereference(sta_info->agg[tid]); if (tid_info) { spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state > CARL9170_TID_STATE_SHUTDOWN) tid_info->state = CARL9170_TID_STATE_SHUTDOWN; spin_unlock_bh(&ar->tx_ampdu_list_lock); } RCU_INIT_POINTER(sta_info->agg[tid], NULL); rcu_read_unlock(); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); ieee80211_queue_work(ar->hw, &ar->ampdu_work); break; case IEEE80211_AMPDU_TX_OPERATIONAL: rcu_read_lock(); tid_info = rcu_dereference(sta_info->agg[tid]); sta_info->stats[tid].clear = true; sta_info->stats[tid].req = false; if (tid_info) { bitmap_zero(tid_info->bitmap, CARL9170_BAW_SIZE); tid_info->state = CARL9170_TID_STATE_IDLE; } rcu_read_unlock(); if (WARN_ON_ONCE(!tid_info)) return -EFAULT; break; case IEEE80211_AMPDU_RX_START: case IEEE80211_AMPDU_RX_STOP: /* Handled by hardware */ break; default: return -EOPNOTSUPP; } return 0; } #ifdef CONFIG_CARL9170_WPC static int carl9170_register_wps_button(struct ar9170 *ar) { struct input_dev *input; int err; if (!(ar->features & CARL9170_WPS_BUTTON)) return 0; input = devm_input_allocate_device(&ar->udev->dev); if (!input) return -ENOMEM; snprintf(ar->wps.name, sizeof(ar->wps.name), "%s WPS Button", wiphy_name(ar->hw->wiphy)); snprintf(ar->wps.phys, sizeof(ar->wps.phys), "ieee80211/%s/input0", wiphy_name(ar->hw->wiphy)); input->name = ar->wps.name; input->phys = ar->wps.phys; input->id.bustype = BUS_USB; input->dev.parent = &ar->hw->wiphy->dev; input_set_capability(input, EV_KEY, KEY_WPS_BUTTON); err = input_register_device(input); if (err) return err; ar->wps.pbc = input; return 0; } #endif /* CONFIG_CARL9170_WPC */ #ifdef CONFIG_CARL9170_HWRNG static int carl9170_rng_get(struct ar9170 *ar) { #define RW (CARL9170_MAX_CMD_PAYLOAD_LEN / sizeof(u32)) #define RB (CARL9170_MAX_CMD_PAYLOAD_LEN) static const __le32 rng_load[RW] = { [0 ... (RW - 1)] = cpu_to_le32(AR9170_RAND_REG_NUM)}; u32 buf[RW]; unsigned int i, off = 0, transfer, count; int err; BUILD_BUG_ON(RB > CARL9170_MAX_CMD_PAYLOAD_LEN); if (!IS_ACCEPTING_CMD(ar)) return -EAGAIN; count = ARRAY_SIZE(ar->rng.cache); while (count) { err = carl9170_exec_cmd(ar, CARL9170_CMD_RREG, RB, (u8 *) rng_load, RB, (u8 *) buf); if (err) return err; transfer = min_t(unsigned int, count, RW); for (i = 0; i < transfer; i++) ar->rng.cache[off + i] = buf[i]; off += transfer; count -= transfer; } ar->rng.cache_idx = 0; #undef RW #undef RB return 0; } static int carl9170_rng_read(struct hwrng *rng, u32 *data) { struct ar9170 *ar = (struct ar9170 *)rng->priv; int ret = -EIO; mutex_lock(&ar->mutex); if (ar->rng.cache_idx >= ARRAY_SIZE(ar->rng.cache)) { ret = carl9170_rng_get(ar); if (ret) { mutex_unlock(&ar->mutex); return ret; } } *data = ar->rng.cache[ar->rng.cache_idx++]; mutex_unlock(&ar->mutex); return sizeof(u16); } static int carl9170_register_hwrng(struct ar9170 *ar) { int err; snprintf(ar->rng.name, ARRAY_SIZE(ar->rng.name), "%s_%s", KBUILD_MODNAME, wiphy_name(ar->hw->wiphy)); ar->rng.rng.name = ar->rng.name; ar->rng.rng.data_read = carl9170_rng_read; ar->rng.rng.priv = (unsigned long)ar; err = devm_hwrng_register(&ar->udev->dev, &ar->rng.rng); if (err) { dev_err(&ar->udev->dev, "Failed to register the random " "number generator (%d)\n", err); return err; } return carl9170_rng_get(ar); } #endif /* CONFIG_CARL9170_HWRNG */ static int carl9170_op_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey) { struct ar9170 *ar = hw->priv; struct ieee80211_channel *chan; struct ieee80211_supported_band *band; int err, b, i; chan = ar->channel; if (!chan) return -ENODEV; if (idx == chan->hw_value) { mutex_lock(&ar->mutex); err = carl9170_update_survey(ar, false, true); mutex_unlock(&ar->mutex); if (err) return err; } for (b = 0; b < NUM_NL80211_BANDS; b++) { band = ar->hw->wiphy->bands[b]; if (!band) continue; for (i = 0; i < band->n_channels; i++) { if (band->channels[i].hw_value == idx) { chan = &band->channels[i]; goto found; } } } return -ENOENT; found: memcpy(survey, &ar->survey[idx], sizeof(*survey)); survey->channel = chan; survey->filled = SURVEY_INFO_NOISE_DBM; if (ar->channel == chan) survey->filled |= SURVEY_INFO_IN_USE; if (ar->fw.hw_counters) { survey->filled |= SURVEY_INFO_TIME | SURVEY_INFO_TIME_BUSY | SURVEY_INFO_TIME_TX; } return 0; } static void carl9170_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { struct ar9170 *ar = hw->priv; unsigned int vid; mutex_lock(&ar->mutex); for_each_set_bit(vid, &ar->vif_bitmap, ar->fw.vif_num) carl9170_flush_cab(ar, vid); carl9170_flush(ar, drop); mutex_unlock(&ar->mutex); } static int carl9170_op_get_stats(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats) { struct ar9170 *ar = hw->priv; memset(stats, 0, sizeof(*stats)); stats->dot11ACKFailureCount = ar->tx_ack_failures; stats->dot11FCSErrorCount = ar->tx_fcs_errors; return 0; } static void carl9170_op_sta_notify(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd cmd, struct ieee80211_sta *sta) { struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; switch (cmd) { case STA_NOTIFY_SLEEP: sta_info->sleeping = true; if (atomic_read(&sta_info->pending_frames)) ieee80211_sta_block_awake(hw, sta, true); break; case STA_NOTIFY_AWAKE: sta_info->sleeping = false; break; } } static bool carl9170_tx_frames_pending(struct ieee80211_hw *hw) { struct ar9170 *ar = hw->priv; return !!atomic_read(&ar->tx_total_queued); } static const struct ieee80211_ops carl9170_ops = { .add_chanctx = ieee80211_emulate_add_chanctx, .remove_chanctx = ieee80211_emulate_remove_chanctx, .change_chanctx = ieee80211_emulate_change_chanctx, .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = carl9170_op_start, .stop = carl9170_op_stop, .tx = carl9170_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .flush = carl9170_op_flush, .add_interface = carl9170_op_add_interface, .remove_interface = carl9170_op_remove_interface, .config = carl9170_op_config, .prepare_multicast = carl9170_op_prepare_multicast, .configure_filter = carl9170_op_configure_filter, .conf_tx = carl9170_op_conf_tx, .bss_info_changed = carl9170_op_bss_info_changed, .get_tsf = carl9170_op_get_tsf, .set_key = carl9170_op_set_key, .sta_add = carl9170_op_sta_add, .sta_remove = carl9170_op_sta_remove, .sta_notify = carl9170_op_sta_notify, .get_survey = carl9170_op_get_survey, .get_stats = carl9170_op_get_stats, .ampdu_action = carl9170_op_ampdu_action, .tx_frames_pending = carl9170_tx_frames_pending, }; void *carl9170_alloc(size_t priv_size) { struct ieee80211_hw *hw; struct ar9170 *ar; struct sk_buff *skb; int i; /* * this buffer is used for rx stream reconstruction. * Under heavy load this device (or the transport layer?) * tends to split the streams into separate rx descriptors. */ skb = __dev_alloc_skb(AR9170_RX_STREAM_MAX_SIZE, GFP_KERNEL); if (!skb) goto err_nomem; hw = ieee80211_alloc_hw(priv_size, &carl9170_ops); if (!hw) goto err_nomem; ar = hw->priv; ar->hw = hw; ar->rx_failover = skb; memset(&ar->rx_plcp, 0, sizeof(struct ar9170_rx_head)); ar->rx_has_plcp = false; /* * Here's a hidden pitfall! * * All 4 AC queues work perfectly well under _legacy_ operation. * However as soon as aggregation is enabled, the traffic flow * gets very bumpy. Therefore we have to _switch_ to a * software AC with a single HW queue. */ hw->queues = __AR9170_NUM_TXQ; mutex_init(&ar->mutex); spin_lock_init(&ar->beacon_lock); spin_lock_init(&ar->cmd_lock); spin_lock_init(&ar->tx_stats_lock); spin_lock_init(&ar->tx_ampdu_list_lock); spin_lock_init(&ar->mem_lock); spin_lock_init(&ar->state_lock); atomic_set(&ar->pending_restarts, 0); ar->vifs = 0; for (i = 0; i < ar->hw->queues; i++) { skb_queue_head_init(&ar->tx_status[i]); skb_queue_head_init(&ar->tx_pending[i]); INIT_LIST_HEAD(&ar->bar_list[i]); spin_lock_init(&ar->bar_list_lock[i]); } INIT_WORK(&ar->ps_work, carl9170_ps_work); INIT_WORK(&ar->ping_work, carl9170_ping_work); INIT_WORK(&ar->restart_work, carl9170_restart_work); INIT_WORK(&ar->ampdu_work, carl9170_ampdu_work); INIT_DELAYED_WORK(&ar->stat_work, carl9170_stat_work); INIT_DELAYED_WORK(&ar->tx_janitor, carl9170_tx_janitor); INIT_LIST_HEAD(&ar->tx_ampdu_list); rcu_assign_pointer(ar->tx_ampdu_iter, (struct carl9170_sta_tid *) &ar->tx_ampdu_list); bitmap_zero(&ar->vif_bitmap, ar->fw.vif_num); INIT_LIST_HEAD(&ar->vif_list); init_completion(&ar->tx_flush); /* firmware decides which modes we support */ hw->wiphy->interface_modes = 0; ieee80211_hw_set(hw, RX_INCLUDES_FCS); ieee80211_hw_set(hw, MFP_CAPABLE); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(hw, SUPPORTS_PS); ieee80211_hw_set(hw, PS_NULLFUNC_STACK); ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); if (!modparam_noht) { /* * see the comment above, why we allow the user * to disable HT by a module parameter. */ ieee80211_hw_set(hw, AMPDU_AGGREGATION); } hw->extra_tx_headroom = sizeof(struct _carl9170_tx_superframe); hw->sta_data_size = sizeof(struct carl9170_sta_info); hw->vif_data_size = sizeof(struct carl9170_vif_info); hw->max_rates = CARL9170_TX_MAX_RATES; hw->max_rate_tries = CARL9170_TX_USER_RATE_TRIES; for (i = 0; i < ARRAY_SIZE(ar->noise); i++) ar->noise[i] = -95; /* ATH_DEFAULT_NOISE_FLOOR */ wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); return ar; err_nomem: kfree_skb(skb); return ERR_PTR(-ENOMEM); } static int carl9170_read_eeprom(struct ar9170 *ar) { #define RW 8 /* number of words to read at once */ #define RB (sizeof(u32) * RW) u8 *eeprom = (void *)&ar->eeprom; __le32 offsets[RW]; int i, j, err; BUILD_BUG_ON(sizeof(ar->eeprom) & 3); BUILD_BUG_ON(RB > CARL9170_MAX_CMD_LEN - 4); #ifndef __CHECKER__ /* don't want to handle trailing remains */ BUILD_BUG_ON(sizeof(ar->eeprom) % RB); #endif for (i = 0; i < sizeof(ar->eeprom) / RB; i++) { for (j = 0; j < RW; j++) offsets[j] = cpu_to_le32(AR9170_EEPROM_START + RB * i + 4 * j); err = carl9170_exec_cmd(ar, CARL9170_CMD_RREG, RB, (u8 *) &offsets, RB, eeprom + RB * i); if (err) return err; } #undef RW #undef RB return 0; } static int carl9170_parse_eeprom(struct ar9170 *ar) { struct ath_regulatory *regulatory = &ar->common.regulatory; unsigned int rx_streams, tx_streams, tx_params = 0; int bands = 0; int chans = 0; if (ar->eeprom.length == cpu_to_le16(0xffff)) return -ENODATA; rx_streams = hweight8(ar->eeprom.rx_mask); tx_streams = hweight8(ar->eeprom.tx_mask); if (rx_streams != tx_streams) { tx_params = IEEE80211_HT_MCS_TX_RX_DIFF; WARN_ON(!(tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)); tx_params |= (tx_streams - 1) << IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT; carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params; carl9170_band_5GHz.ht_cap.mcs.tx_params |= tx_params; } if (ar->eeprom.operating_flags & AR9170_OPFLAG_2GHZ) { ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = &carl9170_band_2GHz; chans += carl9170_band_2GHz.n_channels; bands++; } if (ar->eeprom.operating_flags & AR9170_OPFLAG_5GHZ) { ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = &carl9170_band_5GHz; chans += carl9170_band_5GHz.n_channels; bands++; } if (!bands) return -EINVAL; ar->survey = devm_kcalloc(&ar->udev->dev, chans, sizeof(struct survey_info), GFP_KERNEL); if (!ar->survey) return -ENOMEM; ar->num_channels = chans; regulatory->current_rd = le16_to_cpu(ar->eeprom.reg_domain[0]); /* second part of wiphy init */ SET_IEEE80211_PERM_ADDR(ar->hw, ar->eeprom.mac_address); return 0; } static void carl9170_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) { struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy); struct ar9170 *ar = hw->priv; ath_reg_notifier_apply(wiphy, request, &ar->common.regulatory); } int carl9170_register(struct ar9170 *ar) { struct ath_regulatory *regulatory = &ar->common.regulatory; int err = 0, i; ar->mem_bitmap = devm_bitmap_zalloc(&ar->udev->dev, ar->fw.mem_blocks, GFP_KERNEL); if (!ar->mem_bitmap) return -ENOMEM; /* try to read EEPROM, init MAC addr */ err = carl9170_read_eeprom(ar); if (err) return err; err = carl9170_parse_eeprom(ar); if (err) return err; err = ath_regd_init(regulatory, ar->hw->wiphy, carl9170_reg_notifier); if (err) return err; if (modparam_noht) { carl9170_band_2GHz.ht_cap.ht_supported = false; carl9170_band_5GHz.ht_cap.ht_supported = false; } for (i = 0; i < ar->fw.vif_num; i++) { ar->vif_priv[i].id = i; ar->vif_priv[i].vif = NULL; } err = ieee80211_register_hw(ar->hw); if (err) return err; /* mac80211 interface is now registered */ ar->registered = true; if (!ath_is_world_regd(regulatory)) regulatory_hint(ar->hw->wiphy, regulatory->alpha2); #ifdef CONFIG_CA