Total coverage: 145583 (8%)of 1838217
84 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ #ifndef _NET_BATMAN_ADV_MESH_INTERFACE_H_ #define _NET_BATMAN_ADV_MESH_INTERFACE_H_ #include "main.h" #include <linux/kref.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/rtnetlink.h> int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node); bool batadv_meshif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); void batadv_meshif_vlan_release(struct kref *ref); struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid); /** * batadv_meshif_vlan_put() - decrease the vlan object refcounter and * possibly release it * @vlan: the vlan object to release */ static inline void batadv_meshif_vlan_put(struct batadv_meshif_vlan *vlan) { if (!vlan) return; kref_put(&vlan->refcount, batadv_meshif_vlan_release); } #endif /* _NET_BATMAN_ADV_MESH_INTERFACE_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DAX_H #define _LINUX_DAX_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/radix-tree.h> typedef unsigned long dax_entry_t; struct dax_device; struct gendisk; struct iomap_ops; struct iomap_iter; struct iomap; enum dax_access_mode { DAX_ACCESS, DAX_RECOVERY_WRITE, }; struct dax_operations { /* * direct_access: translate a device-relative * logical-page-offset into an absolute physical pfn. Return the * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, enum dax_access_mode, void **, pfn_t *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); /* * recovery_write: recover a poisoned range by DAX device driver * capable of clearing poison. */ size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *iter); }; struct dax_holder_operations { /* * notify_failure - notify memory failure into inner holder device * @dax_dev: the dax device which contains the holder * @offset: offset on this dax device where memory failure occurs * @len: length of this memory failure event * @flags: action flags for memory failure handler */ int (*notify_failure)(struct dax_device *dax_dev, u64 offset, u64 len, int mf_flags); }; #if IS_ENABLED(CONFIG_DAX) struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); void *dax_holder(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev); void set_dax_nocache(struct dax_device *dax_dev); void set_dax_nomc(struct dax_device *dax_dev); void set_dax_synchronous(struct dax_device *dax_dev); size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); /* * Check if given mapping is supported by the file / underlying device. */ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { if (!(vma->vm_flags & VM_SYNC)) return true; if (!IS_DAX(file_inode(vma->vm_file))) return false; return dax_synchronous(dax_dev); } #else static inline void *dax_holder(struct dax_device *dax_dev) { return NULL; } static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { return ERR_PTR(-EOPNOTSUPP); } static inline void put_dax(struct dax_device *dax_dev) { } static inline void kill_dax(struct dax_device *dax_dev) { } static inline void dax_write_cache(struct dax_device *dax_dev, bool wc) { } static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) { return false; } static inline bool dax_synchronous(struct dax_device *dax_dev) { return true; } static inline void set_dax_nocache(struct dax_device *dax_dev) { } static inline void set_dax_nomc(struct dax_device *dax_dev) { } static inline void set_dax_synchronous(struct dax_device *dax_dev) { } static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { return !(vma->vm_flags & VM_SYNC); } static inline size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { return 0; } #endif struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, void *holder, const struct dax_holder_operations *ops); void fs_put_dax(struct dax_device *dax_dev, void *holder); #else static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { return 0; } static inline void dax_remove_host(struct gendisk *disk) { } static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, void *holder, const struct dax_holder_operations *ops) { return NULL; } static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) { } #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ #if IS_ENABLED(CONFIG_FS_DAX) int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); dax_entry_t dax_lock_folio(struct folio *folio); void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, unsigned long index, dax_entry_t cookie); #else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; } static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) { return NULL; } static inline int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc) { return -EOPNOTSUPP; } static inline dax_entry_t dax_lock_folio(struct folio *folio) { if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page) { return 0; } static inline void dax_unlock_mapping_entry(struct address_space *mapping, unsigned long index, dax_entry_t cookie) { } #endif int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); #else static inline int dax_read_lock(void) { return 0; } static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages); int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len, int mf_flags); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, const struct iomap_ops *ops); int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, const struct iomap_ops *ops); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); } /* * Due to dax's memory and block duo personalities, hwpoison reporting * takes into consideration which personality is presently visible. * When dax acts like a block device, such as in block IO, an encounter of * dax hwpoison is reported as -EIO. * When dax acts like memory, such as in page fault, a detection of hwpoison * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON. */ static inline int dax_mem2blk_err(int err) { return (err == -EHWPOISON) ? -EIO : err; } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES void hmem_register_resource(int target_nid, struct resource *r); #else static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, const struct resource *res); int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif
70 69 20 20 5 5 5 5 70 69 69 70 70 70 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 // SPDX-License-Identifier: GPL-2.0 /* * HugeTLB Vmemmap Optimization (HVO) * * Copyright (c) 2020, ByteDance. All rights reserved. * * Author: Muchun Song <songmuchun@bytedance.com> * * See Documentation/mm/vmemmap_dedup.rst */ #define pr_fmt(fmt) "HugeTLB: " fmt #include <linux/pgtable.h> #include <linux/moduleparam.h> #include <linux/bootmem_info.h> #include <linux/mmdebug.h> #include <linux/pagewalk.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "hugetlb_vmemmap.h" /** * struct vmemmap_remap_walk - walk vmemmap page table * * @remap_pte: called for each lowest-level entry (PTE). * @nr_walked: the number of walked pte. * @reuse_page: the page which is reused for the tail vmemmap pages. * @reuse_addr: the virtual address of the @reuse_page page. * @vmemmap_pages: the list head of the vmemmap pages that can be freed * or is mapped from. * @flags: used to modify behavior in vmemmap page table walking * operations. */ struct vmemmap_remap_walk { void (*remap_pte)(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk); unsigned long nr_walked; struct page *reuse_page; unsigned long reuse_addr; struct list_head *vmemmap_pages; /* Skip the TLB flush when we split the PMD */ #define VMEMMAP_SPLIT_NO_TLB_FLUSH BIT(0) /* Skip the TLB flush when we remap the PTE */ #define VMEMMAP_REMAP_NO_TLB_FLUSH BIT(1) /* synchronize_rcu() to avoid writes from page_ref_add_unless() */ #define VMEMMAP_SYNCHRONIZE_RCU BIT(2) unsigned long flags; }; static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start, struct vmemmap_remap_walk *walk) { pmd_t __pmd; int i; unsigned long addr = start; pte_t *pgtable; pgtable = pte_alloc_one_kernel(&init_mm); if (!pgtable) return -ENOMEM; pmd_populate_kernel(&init_mm, &__pmd, pgtable); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { pte_t entry, *pte; pgprot_t pgprot = PAGE_KERNEL; entry = mk_pte(head + i, pgprot); pte = pte_offset_kernel(&__pmd, addr); set_pte_at(&init_mm, addr, pte, entry); } spin_lock(&init_mm.page_table_lock); if (likely(pmd_leaf(*pmd))) { /* * Higher order allocations from buddy allocator must be able to * be treated as indepdenent small pages (as they can be freed * individually). */ if (!PageReserved(head)) split_page(head, get_order(PMD_SIZE)); /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); if (!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH)) flush_tlb_kernel_range(start, start + PMD_SIZE); } else { pte_free_kernel(&init_mm, pgtable); } spin_unlock(&init_mm.page_table_lock); return 0; } static int vmemmap_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { int ret = 0; struct page *head; struct vmemmap_remap_walk *vmemmap_walk = walk->private; /* Only splitting, not remapping the vmemmap pages. */ if (!vmemmap_walk->remap_pte) walk->action = ACTION_CONTINUE; spin_lock(&init_mm.page_table_lock); head = pmd_leaf(*pmd) ? pmd_page(*pmd) : NULL; /* * Due to HugeTLB alignment requirements and the vmemmap * pages being at the start of the hotplugged memory * region in memory_hotplug.memmap_on_memory case. Checking * the vmemmap page associated with the first vmemmap page * if it is self-hosted is sufficient. * * [ hotplugged memory ] * [ section ][...][ section ] * [ vmemmap ][ usable memory ] * ^ | ^ | * +--+ | | * +------------------------+ */ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && unlikely(!vmemmap_walk->nr_walked)) { struct page *page = head ? head + pte_index(addr) : pte_page(ptep_get(pte_offset_kernel(pmd, addr))); if (PageVmemmapSelfHosted(page)) ret = -ENOTSUPP; } spin_unlock(&init_mm.page_table_lock); if (!head || ret) return ret; return vmemmap_split_pmd(pmd, head, addr & PMD_MASK, vmemmap_walk); } static int vmemmap_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct vmemmap_remap_walk *vmemmap_walk = walk->private; /* * The reuse_page is found 'first' in page table walking before * starting remapping. */ if (!vmemmap_walk->reuse_page) vmemmap_walk->reuse_page = pte_page(ptep_get(pte)); else vmemmap_walk->remap_pte(pte, addr, vmemmap_walk); vmemmap_walk->nr_walked++; return 0; } static const struct mm_walk_ops vmemmap_remap_ops = { .pmd_entry = vmemmap_pmd_entry, .pte_entry = vmemmap_pte_entry, }; static int vmemmap_remap_range(unsigned long start, unsigned long end, struct vmemmap_remap_walk *walk) { int ret; VM_BUG_ON(!PAGE_ALIGNED(start | end)); mmap_read_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, NULL, walk); mmap_read_unlock(&init_mm); if (ret) return ret; if (walk->remap_pte && !(walk->flags & VMEMMAP_REMAP_NO_TLB_FLUSH)) flush_tlb_kernel_range(start, end); return 0; } /* * Free a vmemmap page. A vmemmap page can be allocated from the memblock * allocator or buddy allocator. If the PG_reserved flag is set, it means * that it allocated from the memblock allocator, just free it via the * free_bootmem_page(). Otherwise, use __free_page(). */ static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { memmap_boot_pages_add(-1); free_bootmem_page(page); } else { memmap_pages_add(-1); __free_page(page); } } /* Free a list of the vmemmap pages */ static void free_vmemmap_page_list(struct list_head *list) { struct page *page, *next; list_for_each_entry_safe(page, next, list, lru) free_vmemmap_page(page); } static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { /* * Remap the tail pages as read-only to catch illegal write operation * to the tail pages. */ pgprot_t pgprot = PAGE_KERNEL_RO; struct page *page = pte_page(ptep_get(pte)); pte_t entry; /* Remapping the head page requires r/w */ if (unlikely(addr == walk->reuse_addr)) { pgprot = PAGE_KERNEL; list_del(&walk->reuse_page->lru); /* * Makes sure that preceding stores to the page contents from * vmemmap_remap_free() become visible before the set_pte_at() * write. */ smp_wmb(); } entry = mk_pte(walk->reuse_page, pgprot); list_add(&page->lru, walk->vmemmap_pages); set_pte_at(&init_mm, addr, pte, entry); } /* * How many struct page structs need to be reset. When we reuse the head * struct page, the special metadata (e.g. page->flags or page->mapping) * cannot copy to the tail struct page structs. The invalid value will be * checked in the free_tail_page_prepare(). In order to avoid the message * of "corrupted mapping in tail page". We need to reset at least 3 (one * head struct page struct and two tail struct page structs) struct page * structs. */ #define NR_RESET_STRUCT_PAGE 3 static inline void reset_struct_pages(struct page *start) { struct page *from = start + NR_RESET_STRUCT_PAGE; BUILD_BUG_ON(NR_RESET_STRUCT_PAGE * 2 > PAGE_SIZE / sizeof(struct page)); memcpy(start, from, sizeof(*from) * NR_RESET_STRUCT_PAGE); } static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { pgprot_t pgprot = PAGE_KERNEL; struct page *page; void *to; BUG_ON(pte_page(ptep_get(pte)) != walk->reuse_page); page = list_first_entry(walk->vmemmap_pages, struct page, lru); list_del(&page->lru); to = page_to_virt(page); copy_page(to, (void *)walk->reuse_addr); reset_struct_pages(to); /* * Makes sure that preceding stores to the page contents become visible * before the set_pte_at() write. */ smp_wmb(); set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); } /** * vmemmap_remap_split - split the vmemmap virtual address range [@start, @end) * backing PMDs of the directmap into PTEs * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_split(unsigned long start, unsigned long end, unsigned long reuse) { struct vmemmap_remap_walk walk = { .remap_pte = NULL, .flags = VMEMMAP_SPLIT_NO_TLB_FLUSH, }; /* See the comment in the vmemmap_remap_free(). */ BUG_ON(start - reuse != PAGE_SIZE); return vmemmap_remap_range(reuse, end, &walk); } /** * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end) * to the page which @reuse is mapped to, then free vmemmap * which the range are mapped to. * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * @vmemmap_pages: list to deposit vmemmap pages to be freed. It is callers * responsibility to free pages. * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse, struct list_head *vmemmap_pages, unsigned long flags) { int ret; struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, .flags = flags, }; int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; /* * Allocate a new head vmemmap page to avoid breaking a contiguous * block of struct page memory when freeing it back to page allocator * in free_vmemmap_page_list(). This will allow the likely contiguous * struct page backing memory to be kept contiguous and allowing for * more allocations of hugepages. Fallback to the currently * mapped head page in case should it fail to allocate. */ walk.reuse_page = alloc_pages_node(nid, gfp_mask, 0); if (walk.reuse_page) { copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); memmap_pages_add(1); } /* * In order to make remapping routine most efficient for the huge pages, * the routine of vmemmap page table walking has the following rules * (see more details from the vmemmap_pte_range()): * * - The range [@start, @end) and the range [@reuse, @reuse + PAGE_SIZE) * should be continuous. * - The @reuse address is part of the range [@reuse, @end) that we are * walking which is passed to vmemmap_remap_range(). * - The @reuse address is the first in the complete range. * * So we need to make sure that @start and @reuse meet the above rules. */ BUG_ON(start - reuse != PAGE_SIZE); ret = vmemmap_remap_range(reuse, end, &walk); if (ret && walk.nr_walked) { end = reuse + walk.nr_walked * PAGE_SIZE; /* * vmemmap_pages contains pages from the previous * vmemmap_remap_range call which failed. These * are pages which were removed from the vmemmap. * They will be restored in the following call. */ walk = (struct vmemmap_remap_walk) { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, .flags = 0, }; vmemmap_remap_range(reuse, end, &walk); } return ret; } static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, struct list_head *list) { gfp_t gfp_mask = GFP_KERNEL | __GFP_RETRY_MAYFAIL; unsigned long nr_pages = (end - start) >> PAGE_SHIFT; int nid = page_to_nid((struct page *)start); struct page *page, *next; int i; for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); if (!page) goto out; list_add(&page->lru, list); } memmap_pages_add(nr_pages); return 0; out: list_for_each_entry_safe(page, next, list, lru) __free_page(page); return -ENOMEM; } /** * vmemmap_remap_alloc - remap the vmemmap virtual address range [@start, end) * to the page which is from the @vmemmap_pages * respectively. * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, unsigned long flags) { LIST_HEAD(vmemmap_pages); struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, .flags = flags, }; /* See the comment in the vmemmap_remap_free(). */ BUG_ON(start - reuse != PAGE_SIZE); if (alloc_vmemmap_page_list(start, end, &vmemmap_pages)) return -ENOMEM; return vmemmap_remap_range(reuse, end, &walk); } DEFINE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio, unsigned long flags) { int ret; unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; if (flags & VMEMMAP_SYNCHRONIZE_RCU) synchronize_rcu(); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * The pages which the vmemmap virtual address range [@vmemmap_start, * @vmemmap_end) are mapped to are freed to the buddy allocator, and * the range is mapped to the page which @vmemmap_reuse is mapped to. * When a HugeTLB page is freed to the buddy allocator, previously * discarded vmemmap pages must be allocated and remapping. */ ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse, flags); if (!ret) { folio_clear_hugetlb_vmemmap_optimized(folio); static_branch_dec(&hugetlb_optimize_vmemmap_key); } return ret; } /** * hugetlb_vmemmap_restore_folio - restore previously optimized (by * hugetlb_vmemmap_optimize_folio()) vmemmap pages which * will be reallocated and remapped. * @h: struct hstate. * @folio: the folio whose vmemmap pages will be restored. * * Return: %0 if @folio's vmemmap pages have been reallocated and remapped, * negative error code otherwise. */ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { return __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_SYNCHRONIZE_RCU); } /** * hugetlb_vmemmap_restore_folios - restore vmemmap for every folio on the list. * @h: hstate. * @folio_list: list of folios. * @non_hvo_folios: Output list of folios for which vmemmap exists. * * Return: number of folios for which vmemmap was restored, or an error code * if an error was encountered restoring vmemmap for a folio. * Folios that have vmemmap are moved to the non_hvo_folios * list. Processing of entries stops when the first error is * encountered. The folio that experienced the error and all * non-processed folios will remain on folio_list. */ long hugetlb_vmemmap_restore_folios(const struct hstate *h, struct list_head *folio_list, struct list_head *non_hvo_folios) { struct folio *folio, *t_folio; long restored = 0; long ret = 0; unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { ret = __hugetlb_vmemmap_restore_folio(h, folio, flags); /* only need to synchronize_rcu() once for each batch */ flags &= ~VMEMMAP_SYNCHRONIZE_RCU; if (ret) break; restored++; } /* Add non-optimized folios to output list */ list_move(&folio->lru, non_hvo_folios); } if (restored) flush_tlb_all(); if (!ret) ret = restored; return ret; } /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio) { if (folio_test_hugetlb_vmemmap_optimized(folio)) return false; if (!READ_ONCE(vmemmap_optimize_enabled)) return false; if (!hugetlb_vmemmap_optimizable(h)) return false; return true; } static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio, struct list_head *vmemmap_pages, unsigned long flags) { int ret = 0; unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); if (!vmemmap_should_optimize_folio(h, folio)) return ret; static_branch_inc(&hugetlb_optimize_vmemmap_key); if (flags & VMEMMAP_SYNCHRONIZE_RCU) synchronize_rcu(); /* * Very Subtle * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed * immediately after remapping. As a result, subsequent accesses * and modifications to struct pages associated with the hugetlb * page could be to the OLD struct pages. Set the vmemmap optimized * flag here so that it is copied to the new head page. This keeps * the old and new struct pages in sync. * If there is an error during optimization, we will immediately FLUSH * the TLB and clear the flag below. */ folio_set_hugetlb_vmemmap_optimized(folio); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end) * to the page which @vmemmap_reuse is mapped to. Add pages previously * mapping the range to vmemmap_pages list so that they can be freed by * the caller. */ ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages, flags); if (ret) { static_branch_dec(&hugetlb_optimize_vmemmap_key); folio_clear_hugetlb_vmemmap_optimized(folio); } return ret; } /** * hugetlb_vmemmap_optimize_folio - optimize @folio's vmemmap pages. * @h: struct hstate. * @folio: the folio whose vmemmap pages will be optimized. * * This function only tries to optimize @folio's vmemmap pages and does not * guarantee that the optimization will succeed after it returns. The caller * can use folio_test_hugetlb_vmemmap_optimized(@folio) to detect if @folio's * vmemmap pages have been optimized. */ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_SYNCHRONIZE_RCU); free_vmemmap_page_list(&vmemmap_pages); } static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *folio) { unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; if (!vmemmap_should_optimize_folio(h, folio)) return 0; vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * Split PMDs on the vmemmap virtual address range [@vmemmap_start, * @vmemmap_end] */ return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); } void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) { struct folio *folio; LIST_HEAD(vmemmap_pages); unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; list_for_each_entry(folio, folio_list, lru) { int ret = hugetlb_vmemmap_split_folio(h, folio); /* * Spliting the PMD requires allocating a page, thus lets fail * early once we encounter the first OOM. No point in retrying * as it can be dynamically done on remap with the memory * we get back from the vmemmap deduplication. */ if (ret == -ENOMEM) break; } flush_tlb_all(); list_for_each_entry(folio, folio_list, lru) { int ret; ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags); /* only need to synchronize_rcu() once for each batch */ flags &= ~VMEMMAP_SYNCHRONIZE_RCU; /* * Pages to be freed may have been accumulated. If we * encounter an ENOMEM, free what we have and try again. * This can occur in the case that both spliting fails * halfway and head page allocation also failed. In this * case __hugetlb_vmemmap_optimize_folio() would free memory * allowing more vmemmap remaps to occur. */ if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) { flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); INIT_LIST_HEAD(&vmemmap_pages); __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags); } } flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); } static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", .data = &vmemmap_optimize_enabled, .maxlen = sizeof(vmemmap_optimize_enabled), .mode = 0644, .proc_handler = proc_dobool, }, }; static int __init hugetlb_vmemmap_init(void) { const struct hstate *h; /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */ BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES); for_each_hstate(h) { if (hugetlb_vmemmap_optimizable(h)) { register_sysctl_init("vm", hugetlb_vmemmap_sysctls); break; } } return 0; } late_initcall(hugetlb_vmemmap_init);
2 2 2 2 2 39 5 5 137 2 2 3 3 2 2 3 3 2 2 1 1 6 6 1 1 134 133 134 134 133 8 8 8 8 8 8 8 31 31 31 31 31 31 31 1 1 1 1 25 25 25 25 26 25 24 25 25 25 26 26 26 26 26 26 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 /* * Copyright (C) 2017 Netronome Systems, Inc. * * This software is licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree. * * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ #include <linux/debugfs.h> #include <linux/etherdevice.h> #include <linux/ethtool_netlink.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/netlink.h> #include <net/net_shaper.h> #include <net/netdev_lock.h> #include <net/pkt_cls.h> #include <net/rtnetlink.h> #include <net/udp_tunnel.h> #include "netdevsim.h" MODULE_IMPORT_NS("NETDEV_INTERNAL"); #define NSIM_RING_SIZE 256 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb) { if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) { dev_kfree_skb_any(skb); return NET_RX_DROP; } skb_queue_tail(&rq->skb_queue, skb); return NET_RX_SUCCESS; } static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, struct nsim_rq *rq) { return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb); } static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; struct netdev_config *cfg; struct nsim_rq *rq; int rxq; rcu_read_lock(); if (!nsim_ipsec_tx(ns, skb)) goto out_drop_free; peer_ns = rcu_dereference(ns->peer); if (!peer_ns) goto out_drop_free; peer_dev = peer_ns->netdev; rxq = skb_get_queue_mapping(skb); if (rxq >= peer_dev->num_rx_queues) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; cfg = peer_dev->cfg; if (skb_is_nonlinear(skb) && (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && cfg->hds_thresh > len))) skb_linearize(skb); skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) goto out_drop_cnt; if (!hrtimer_active(&rq->napi_timer)) hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL); rcu_read_unlock(); u64_stats_update_begin(&ns->syncp); ns->tx_packets++; ns->tx_bytes += len; u64_stats_update_end(&ns->syncp); return NETDEV_TX_OK; out_drop_free: dev_kfree_skb(skb); out_drop_cnt: rcu_read_unlock(); u64_stats_update_begin(&ns->syncp); ns->tx_dropped++; u64_stats_update_end(&ns->syncp); return NETDEV_TX_OK; } static void nsim_set_rx_mode(struct net_device *dev) { } static int nsim_change_mtu(struct net_device *dev, int new_mtu) { struct netdevsim *ns = netdev_priv(dev); if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags && new_mtu > NSIM_XDP_MAX_MTU) return -EBUSY; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static void nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct netdevsim *ns = netdev_priv(dev); unsigned int start; do { start = u64_stats_fetch_begin(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; stats->tx_dropped = ns->tx_dropped; } while (u64_stats_fetch_retry(&ns->syncp, start)); } static int nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv); } static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; /* Only refuse multicast addresses, zero address can mean unset/any. */ if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac)) return -EINVAL; memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN); return 0; } static int nsim_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7) return -EINVAL; nsim_dev->vfconfigs[vf].vlan = vlan; nsim_dev->vfconfigs[vf].qos = qos; nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto; return 0; } static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) { pr_err("Not supported in switchdev mode. Please use devlink API.\n"); return -EOPNOTSUPP; } if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; nsim_dev->vfconfigs[vf].min_tx_rate = min; nsim_dev->vfconfigs[vf].max_tx_rate = max; return 0; } static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; nsim_dev->vfconfigs[vf].spoofchk_enabled = val; return 0; } static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; nsim_dev->vfconfigs[vf].rss_query_enabled = val; return 0; } static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; nsim_dev->vfconfigs[vf].trusted = val; return 0; } static int nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; ivi->vf = vf; ivi->linkstate = nsim_dev->vfconfigs[vf].link_state; ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate; ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate; ivi->vlan = nsim_dev->vfconfigs[vf].vlan; ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto; ivi->qos = nsim_dev->vfconfigs[vf].qos; memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN); ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled; ivi->trusted = nsim_dev->vfconfigs[vf].trusted; ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled; return 0; } static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) { struct netdevsim *ns = netdev_priv(dev); struct nsim_dev *nsim_dev = ns->nsim_dev; if (vf >= nsim_dev_get_vfs(nsim_dev)) return -EINVAL; switch (state) { case IFLA_VF_LINK_STATE_AUTO: case IFLA_VF_LINK_STATE_ENABLE: case IFLA_VF_LINK_STATE_DISABLE: break; default: return -EINVAL; } nsim_dev->vfconfigs[vf].link_state = state; return 0; } static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats) { stats->window_drops = 0; stats->tx_overruns = 0; } static int nsim_setup_tc_taprio(struct net_device *dev, struct tc_taprio_qopt_offload *offload) { int err = 0; switch (offload->cmd) { case TAPRIO_CMD_REPLACE: case TAPRIO_CMD_DESTROY: break; case TAPRIO_CMD_STATS: nsim_taprio_stats(&offload->stats); break; default: err = -EOPNOTSUPP; } return err; } static LIST_HEAD(nsim_block_cb_list); static int nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct netdevsim *ns = netdev_priv(dev); switch (type) { case TC_SETUP_QDISC_TAPRIO: return nsim_setup_tc_taprio(dev, type_data); case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, &nsim_block_cb_list, nsim_setup_tc_block_cb, ns, ns, true); default: return -EOPNOTSUPP; } } static int nsim_set_features(struct net_device *dev, netdev_features_t features) { struct netdevsim *ns = netdev_priv(dev); if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC)) return nsim_bpf_disable_tc(ns); return 0; } static int nsim_get_iflink(const struct net_device *dev) { struct netdevsim *nsim, *peer; int iflink; nsim = netdev_priv(dev); rcu_read_lock(); peer = rcu_dereference(nsim->peer); iflink = peer ? READ_ONCE(peer->netdev->ifindex) : READ_ONCE(dev->ifindex); rcu_read_unlock(); return iflink; } static int nsim_rcv(struct nsim_rq *rq, int budget) { struct sk_buff *skb; int i; for (i = 0; i < budget; i++) { if (skb_queue_empty(&rq->skb_queue)) break; skb = skb_dequeue(&rq->skb_queue); netif_receive_skb(skb); } return i; } static int nsim_poll(struct napi_struct *napi, int budget) { struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi); int done; done = nsim_rcv(rq, budget); napi_complete(napi); return done; } static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi) { struct page_pool_params params = { .order = 0, .pool_size = NSIM_RING_SIZE, .nid = NUMA_NO_NODE, .dev = &napi->dev->dev, .napi = napi, .dma_dir = DMA_BIDIRECTIONAL, .netdev = napi->dev, }; struct page_pool *pool; pool = page_pool_create(&params); if (IS_ERR(pool)) return PTR_ERR(pool); *p = pool; return 0; } static int nsim_init_napi(struct netdevsim *ns) { struct net_device *dev = ns->netdev; struct nsim_rq *rq; int err, i; for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; err = nsim_create_page_pool(&rq->page_pool, &rq->napi); if (err) goto err_pp_destroy; } return 0; err_pp_destroy: while (i--) { page_pool_destroy(ns->rq[i]->page_pool); ns->rq[i]->page_pool = NULL; } for (i = 0; i < dev->num_rx_queues; i++) __netif_napi_del_locked(&ns->rq[i]->napi); return err; } static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer) { struct nsim_rq *rq; rq = container_of(timer, struct nsim_rq, napi_timer); napi_schedule(&rq->napi); return HRTIMER_NORESTART; } static void nsim_rq_timer_init(struct nsim_rq *rq) { hrtimer_init(&rq->napi_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->napi_timer.function = nsim_napi_schedule; } static void nsim_enable_napi(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; for (i = 0; i < dev->num_rx_queues; i++) { struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); napi_enable_locked(&rq->napi); } } static int nsim_open(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); int err; netdev_assert_locked(dev); err = nsim_init_napi(ns); if (err) return err; nsim_enable_napi(ns); return 0; } static void nsim_del_napi(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; for (i = 0; i < dev->num_rx_queues; i++) { struct nsim_rq *rq = ns->rq[i]; napi_disable_locked(&rq->napi); __netif_napi_del_locked(&rq->napi); } synchronize_net(); for (i = 0; i < dev->num_rx_queues; i++) { page_pool_destroy(ns->rq[i]->page_pool); ns->rq[i]->page_pool = NULL; } } static int nsim_stop(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct netdevsim *peer; netdev_assert_locked(dev); netif_carrier_off(dev); peer = rtnl_dereference(ns->peer); if (peer) netif_carrier_off(peer->netdev); nsim_del_napi(ns); return 0; } static int nsim_shaper_set(struct net_shaper_binding *binding, const struct net_shaper *shaper, struct netlink_ext_ack *extack) { return 0; } static int nsim_shaper_del(struct net_shaper_binding *binding, const struct net_shaper_handle *handle, struct netlink_ext_ack *extack) { return 0; } static int nsim_shaper_group(struct net_shaper_binding *binding, int leaves_count, const struct net_shaper *leaves, const struct net_shaper *root, struct netlink_ext_ack *extack) { return 0; } static void nsim_shaper_cap(struct net_shaper_binding *binding, enum net_shaper_scope scope, unsigned long *flags) { *flags = ULONG_MAX; } static const struct net_shaper_ops nsim_shaper_ops = { .set = nsim_shaper_set, .delete = nsim_shaper_del, .group = nsim_shaper_group, .capabilities = nsim_shaper_cap, }; static const struct net_device_ops nsim_netdev_ops = { .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = nsim_change_mtu, .ndo_get_stats64 = nsim_get_stats64, .ndo_set_vf_mac = nsim_set_vf_mac, .ndo_set_vf_vlan = nsim_set_vf_vlan, .ndo_set_vf_rate = nsim_set_vf_rate, .ndo_set_vf_spoofchk = nsim_set_vf_spoofchk, .ndo_set_vf_trust = nsim_set_vf_trust, .ndo_get_vf_config = nsim_get_vf_config, .ndo_set_vf_link_state = nsim_set_vf_link_state, .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, .ndo_get_iflink = nsim_get_iflink, .ndo_bpf = nsim_bpf, .ndo_open = nsim_open, .ndo_stop = nsim_stop, .net_shaper_ops = &nsim_shaper_ops, }; static const struct net_device_ops nsim_vf_netdev_ops = { .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = nsim_change_mtu, .ndo_get_stats64 = nsim_get_stats64, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, }; /* We don't have true per-queue stats, yet, so do some random fakery here. * Only report stuff for queue 0. */ static void nsim_get_queue_stats_rx(struct net_device *dev, int idx, struct netdev_queue_stats_rx *stats) { struct rtnl_link_stats64 rtstats = {}; if (!idx) nsim_get_stats64(dev, &rtstats); stats->packets = rtstats.rx_packets - !!rtstats.rx_packets; stats->bytes = rtstats.rx_bytes; } static void nsim_get_queue_stats_tx(struct net_device *dev, int idx, struct netdev_queue_stats_tx *stats) { struct rtnl_link_stats64 rtstats = {}; if (!idx) nsim_get_stats64(dev, &rtstats); stats->packets = rtstats.tx_packets - !!rtstats.tx_packets; stats->bytes = rtstats.tx_bytes; } static void nsim_get_base_stats(struct net_device *dev, struct netdev_queue_stats_rx *rx, struct netdev_queue_stats_tx *tx) { struct rtnl_link_stats64 rtstats = {}; nsim_get_stats64(dev, &rtstats); rx->packets = !!rtstats.rx_packets; rx->bytes = 0; tx->packets = !!rtstats.tx_packets; tx->bytes = 0; } static const struct netdev_stat_ops nsim_stat_ops = { .get_queue_stats_tx = nsim_get_queue_stats_tx, .get_queue_stats_rx = nsim_get_queue_stats_rx, .get_base_stats = nsim_get_base_stats, }; static struct nsim_rq *nsim_queue_alloc(void) { struct nsim_rq *rq; rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT); if (!rq) return NULL; skb_queue_head_init(&rq->skb_queue); nsim_rq_timer_init(rq); return rq; } static void nsim_queue_free(struct nsim_rq *rq) { hrtimer_cancel(&rq->napi_timer); skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); kfree(rq); } /* Queue reset mode is controlled by ns->rq_reset_mode. * - normal - new NAPI new pool (old NAPI enabled when new added) * - mode 1 - allocate new pool (NAPI is only disabled / enabled) * - mode 2 - new NAPI new pool (old NAPI removed before new added) * - mode 3 - new NAPI new pool (old NAPI disabled when new added) */ struct nsim_queue_mem { struct nsim_rq *rq; struct page_pool *pp; }; static int nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) { struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); int err; if (ns->rq_reset_mode > 3) return -EINVAL; if (ns->rq_reset_mode == 1) { if (!netif_running(ns->netdev)) return -ENETDOWN; return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); } qmem->rq = nsim_queue_alloc(); if (!qmem->rq) return -ENOMEM; err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi); if (err) goto err_free; if (!ns->rq_reset_mode) netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, idx); return 0; err_free: nsim_queue_free(qmem->rq); return err; } static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) { struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); page_pool_destroy(qmem->pp); if (qmem->rq) { if (!ns->rq_reset_mode) netif_napi_del_locked(&qmem->rq->napi); page_pool_destroy(qmem->rq->page_pool); nsim_queue_free(qmem->rq); } } static int nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) { struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); netdev_assert_locked(dev); if (ns->rq_reset_mode == 1) { ns->rq[idx]->page_pool = qmem->pp; napi_enable_locked(&ns->rq[idx]->napi); return 0; } /* netif_napi_add()/_del() should normally be called from alloc/free, * here we want to test various call orders. */ if (ns->rq_reset_mode == 2) { netif_napi_del_locked(&ns->rq[idx]->napi); netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, idx); } else if (ns->rq_reset_mode == 3) { netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, idx); netif_napi_del_locked(&ns->rq[idx]->napi); } ns->rq[idx] = qmem->rq; napi_enable_locked(&ns->rq[idx]->napi); return 0; } static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) { struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); netdev_assert_locked(dev); napi_disable_locked(&ns->rq[idx]->napi); if (ns->rq_reset_mode == 1) { qmem->pp = ns->rq[idx]->page_pool; page_pool_disable_direct_recycling(qmem->pp); } else { qmem->rq = ns->rq[idx]; } return 0; } static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { .ndo_queue_mem_size = sizeof(struct nsim_queue_mem), .ndo_queue_mem_alloc = nsim_queue_mem_alloc, .ndo_queue_mem_free = nsim_queue_mem_free, .ndo_queue_start = nsim_queue_start, .ndo_queue_stop = nsim_queue_stop, }; static ssize_t nsim_qreset_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; unsigned int queue, mode; char buf[32]; ssize_t ret; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, data, count)) return -EFAULT; buf[count] = '\0'; ret = sscanf(buf, "%u %u", &queue, &mode); if (ret != 2) return -EINVAL; netdev_lock(ns->netdev); if (queue >= ns->netdev->real_num_rx_queues) { ret = -EINVAL; goto exit_unlock; } ns->rq_reset_mode = mode; ret = netdev_rx_queue_restart(ns->netdev, queue); ns->rq_reset_mode = 0; if (ret) goto exit_unlock; ret = count; exit_unlock: netdev_unlock(ns->netdev); return ret; } static const struct file_operations nsim_qreset_fops = { .open = simple_open, .write = nsim_qreset_write, .owner = THIS_MODULE, }; static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; char buf[3] = "n\n"; if (ns->page) buf[0] = 'y'; return simple_read_from_buffer(data, count, ppos, buf, 2); } static ssize_t nsim_pp_hold_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; ssize_t ret; bool val; ret = kstrtobool_from_user(data, count, &val); if (ret) return ret; rtnl_lock(); ret = count; if (val == !!ns->page) goto exit; if (!netif_running(ns->netdev) && val) { ret = -ENETDOWN; } else if (val) { ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool); if (!ns->page) ret = -ENOMEM; } else { page_pool_put_full_page(ns->page->pp, ns->page, false); ns->page = NULL; } exit: rtnl_unlock(); return ret; } static const struct file_operations nsim_pp_hold_fops = { .open = simple_open, .read = nsim_pp_hold_read, .write = nsim_pp_hold_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; static void nsim_setup(struct net_device *dev) { ether_setup(dev); eth_hw_addr_random(dev); dev->tx_queue_len = 0; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | NETIF_F_TSO; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | NETIF_F_TSO; dev->max_mtu = ETH_MAX_MTU; dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD; } static int nsim_queue_init(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq), GFP_KERNEL_ACCOUNT); if (!ns->rq) return -ENOMEM; for (i = 0; i < dev->num_rx_queues; i++) { ns->rq[i] = nsim_queue_alloc(); if (!ns->rq[i]) goto err_free_prev; } return 0; err_free_prev: while (i--) kfree(ns->rq[i]); kfree(ns->rq); return -ENOMEM; } static void nsim_queue_uninit(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; for (i = 0; i < dev->num_rx_queues; i++) nsim_queue_free(ns->rq[i]); kfree(ns->rq); ns->rq = NULL; } static int nsim_init_netdevsim(struct netdevsim *ns) { struct mock_phc *phc; int err; phc = mock_phc_create(&ns->nsim_bus_dev->dev); if (IS_ERR(phc)) return PTR_ERR(phc); ns->phc = phc; ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) goto err_phc_destroy; rtnl_lock(); err = nsim_queue_init(ns); if (err) goto err_utn_destroy; err = nsim_bpf_init(ns); if (err) goto err_rq_destroy; nsim_macsec_init(ns); nsim_ipsec_init(ns); err = register_netdevice(ns->netdev); if (err) goto err_ipsec_teardown; rtnl_unlock(); return 0; err_ipsec_teardown: nsim_ipsec_teardown(ns); nsim_macsec_teardown(ns); nsim_bpf_uninit(ns); err_rq_destroy: nsim_queue_uninit(ns); err_utn_destroy: rtnl_unlock(); nsim_udp_tunnels_info_destroy(ns->netdev); err_phc_destroy: mock_phc_destroy(ns->phc); return err; } static int nsim_init_netdevsim_vf(struct netdevsim *ns) { int err; ns->netdev->netdev_ops = &nsim_vf_netdev_ops; rtnl_lock(); err = register_netdevice(ns->netdev); rtnl_unlock(); return err; } static void nsim_exit_netdevsim(struct netdevsim *ns) { nsim_udp_tunnels_info_destroy(ns->netdev); mock_phc_destroy(ns->phc); } struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { struct net_device *dev; struct netdevsim *ns; int err; dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup, nsim_dev->nsim_bus_dev->num_queues); if (!dev) return ERR_PTR(-ENOMEM); dev_net_set(dev, nsim_dev_net(nsim_dev)); ns = netdev_priv(dev); ns->netdev = dev; u64_stats_init(&ns->syncp); ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); nsim_ethtool_init(ns); if (nsim_dev_port_is_pf(nsim_dev_port)) err = nsim_init_netdevsim(ns); else err = nsim_init_netdevsim_vf(ns); if (err) goto err_free_netdev; ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir, ns, &nsim_pp_hold_fops); ns->qr_dfs = debugfs_create_file("queue_reset", 0200, nsim_dev_port->ddir, ns, &nsim_qreset_fops); return ns; err_free_netdev: free_netdev(dev); return ERR_PTR(err); } void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; struct netdevsim *peer; debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); rtnl_lock(); peer = rtnl_dereference(ns->peer); if (peer) RCU_INIT_POINTER(peer->peer, NULL); RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); nsim_queue_uninit(ns); } rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) nsim_exit_netdevsim(ns); /* Put this intentionally late to exercise the orphaning path */ if (ns->page) { page_pool_put_full_page(ns->page->pp, ns->page, false); ns->page = NULL; } free_netdev(dev); } bool netdev_is_nsim(struct net_device *dev) { return dev->netdev_ops == &nsim_netdev_ops; } static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device"); return -EOPNOTSUPP; } static struct rtnl_link_ops nsim_link_ops __read_mostly = { .kind = DRV_NAME, .validate = nsim_validate, }; static int __init nsim_module_init(void) { int err; err = nsim_dev_init(); if (err) return err; err = nsim_bus_init(); if (err) goto err_dev_exit; err = rtnl_link_register(&nsim_link_ops); if (err) goto err_bus_exit; return 0; err_bus_exit: nsim_bus_exit(); err_dev_exit: nsim_dev_exit(); return err; } static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); nsim_bus_exit(); nsim_dev_exit(); } module_init(nsim_module_init); module_exit(nsim_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Simulated networking device for testing"); MODULE_ALIAS_RTNL_LINK(DRV_NAME);
4 5 4 1 5 5 8 1 2 4 1 9 5 3 1 2 19 1 1 28 1 1 2 24 2 1 2 5 11 12 5 2 4 1 5 5 2 17 19 19 131 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/filter.h> #include <linux/bpf.h> #include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> #include <net/tc_wrapper.h> #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; const char *bpf_name; u16 bpf_num_ops; bool is_ebpf; }; static struct tc_action_ops act_bpf_ops; TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; tcf_lastuse_update(&prog->tcf_tm); bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); __skb_pull(skb, skb->mac_len); } else { bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the * default action specified from tc. * * In case a different well-known TC_ACT opcode has been * returned, it will overwrite the default one. * * For everything else that is unknown, TC_ACT_UNSPEC is * returned. */ switch (filter_res) { case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: case TC_ACT_REDIRECT: action = filter_res; break; case TC_ACT_SHOT: action = filter_res; qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); break; case TC_ACT_UNSPEC: action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } return action; } static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) { return !prog->bpf_ops; } static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, struct sk_buff *skb) { struct nlattr *nla; if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) return -EMSGSIZE; nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) return -EMSGSIZE; memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); return 0; } static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, struct sk_buff *skb) { struct nlattr *nla; if (prog->bpf_name && nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; if (nla_put_u32(skb, TCA_ACT_BPF_ID, prog->filter->aux->id)) return -EMSGSIZE; nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { unsigned char *tp = skb_tail_pointer(skb); struct tcf_bpf *prog = to_bpf(act); struct tc_act_bpf opt = { .index = prog->tcf_index, .refcnt = refcount_read(&prog->tcf_refcnt) - ref, .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind, }; struct tcf_t tm; int ret; spin_lock_bh(&prog->tcf_lock); opt.action = prog->tcf_action; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (tcf_bpf_is_ebpf(prog)) ret = tcf_bpf_dump_ebpf_info(prog, skb); else ret = tcf_bpf_dump_bpf_info(prog, skb); if (ret) goto nla_put_failure; tcf_tm_dump(&tm, &prog->tcf_tm); if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm, TCA_ACT_BPF_PAD)) goto nla_put_failure; spin_unlock_bh(&prog->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&prog->tcf_lock); nlmsg_trim(skb, tp); return -1; } static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { struct sock_filter *bpf_ops; struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; u16 bpf_size, bpf_num_ops; int ret; bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) return -EINVAL; bpf_size = bpf_num_ops * sizeof(*bpf_ops); if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) return -EINVAL; bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL); if (bpf_ops == NULL) return -ENOMEM; fprog_tmp.len = bpf_num_ops; fprog_tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &fprog_tmp); if (ret < 0) { kfree(bpf_ops); return ret; } cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; cfg->is_ebpf = false; return 0; } static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { struct bpf_prog *fp; char *name = NULL; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT); if (IS_ERR(fp)) return PTR_ERR(fp); if (tb[TCA_ACT_BPF_NAME]) { name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; } } cfg->bpf_name = name; cfg->filter = fp; cfg->is_ebpf = true; return 0; } static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) { struct bpf_prog *filter = cfg->filter; if (filter) { if (cfg->is_ebpf) bpf_prog_put(filter); else bpf_prog_destroy(filter); } kfree(cfg->bpf_ops); kfree(cfg->bpf_name); } static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, struct tcf_bpf_cfg *cfg) { cfg->is_ebpf = tcf_bpf_is_ebpf(prog); /* updates to prog->filter are prevented, since it's called either * with tcf lock or during final cleanup in rcu callback */ cfg->filter = rcu_dereference_protected(prog->filter, 1); cfg->bpf_ops = prog->bpf_ops; cfg->bpf_name = prog->bpf_name; } static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; bool is_bpf, is_ebpf; int ret, res = 0; u32 index; if (!nla) return -EINVAL; ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_ACT_BPF_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_ACT_BPF_PARMS]); index = parm->index; ret = tcf_idr_check_alloc(tn, &index, act, bind); if (!ret) { ret = tcf_idr_create(tn, index, est, act, &act_bpf_ops, bind, true, flags); if (ret < 0) { tcf_idr_cleanup(tn, index); return ret; } res = ACT_P_CREATED; } else if (ret > 0) { /* Don't override defaults. */ if (bind) return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); return -EEXIST; } } else { return ret; } ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (ret < 0) goto release_idr; is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if (is_bpf == is_ebpf) { ret = -EINVAL; goto put_chain; } memset(&cfg, 0, sizeof(cfg)); ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) goto put_chain; prog = to_bpf(*act); spin_lock_bh(&prog->tcf_lock); if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); } return res; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*act, bind); return ret; } static void tcf_bpf_cleanup(struct tc_action *act) { struct tcf_bpf_cfg tmp; tcf_bpf_prog_fill_cfg(to_bpf(act), &tmp); tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, .cleanup = tcf_bpf_cleanup, .init = tcf_bpf_init, .size = sizeof(struct tcf_bpf), }; MODULE_ALIAS_NET_ACT("bpf"); static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_bpf_ops.net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, .exit_batch = bpf_exit_net, .id = &act_bpf_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init bpf_init_module(void) { return tcf_register_action(&act_bpf_ops, &bpf_net_ops); } static void __exit bpf_cleanup_module(void) { tcf_unregister_action(&act_bpf_ops, &bpf_net_ops); } module_init(bpf_init_module); module_exit(bpf_cleanup_module); MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); MODULE_DESCRIPTION("TC BPF based action"); MODULE_LICENSE("GPL v2");
20 55 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * inet6 interface/address list definitions * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _NET_IF_INET6_H #define _NET_IF_INET6_H #include <net/snmp.h> #include <linux/ipv6.h> #include <linux/refcount.h> /* inet6_dev.if_flags */ #define IF_RA_OTHERCONF 0x80 #define IF_RA_MANAGED 0x40 #define IF_RA_RCVD 0x20 #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, INET6_IFADDR_STATE_POSTDAD, INET6_IFADDR_STATE_ERRDAD, INET6_IFADDR_STATE_DEAD, }; struct inet6_ifaddr { struct in6_addr addr; __u32 prefix_len; __u32 rt_priority; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; __u32 prefered_lft; refcount_t refcnt; spinlock_t lock; int state; __u32 flags; __u8 dad_probes; __u8 stable_privacy_retry; __u16 scope; __u64 dad_nonce; unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ struct delayed_work dad_work; struct inet6_dev *idev; struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; /* * Used to safely traverse idev->addr_list in process context * if the idev->lock needed to protect idev->addr_list cannot be held. * In that case, add the items to this list temporarily and iterate * without holding idev->lock. * See addrconf_ifdown and dev_forward_change. */ struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; int regen_count; bool tokenized; u8 ifa_proto; struct rcu_head rcu; struct in6_addr peer_addr; }; struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; struct in6_addr sl_addr[] __counted_by(sl_max); }; #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ipv6_mc_socklist __rcu *next; struct ip6_sf_socklist __rcu *sflist; struct rcu_head rcu; }; struct ip6_sf_list { struct ip6_sf_list __rcu *sf_next; struct in6_addr sf_addr; unsigned long sf_count[2]; /* include/exclude counts */ unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ struct rcu_head rcu; }; #define MAF_TIMER_RUNNING 0x01 #define MAF_LAST_REPORTER 0x02 #define MAF_LOADED 0x04 #define MAF_NOREPORT 0x08 #define MAF_GSQUERY 0x10 struct ifmcaddr6 { struct in6_addr mca_addr; struct inet6_dev *idev; struct ifmcaddr6 __rcu *next; struct ip6_sf_list __rcu *mca_sources; struct ip6_sf_list __rcu *mca_tomb; unsigned int mca_sfmode; unsigned char mca_crcount; unsigned long mca_sfcount[2]; struct delayed_work mca_work; unsigned int mca_flags; int mca_users; refcount_t mca_refcnt; unsigned long mca_cstamp; unsigned long mca_tstamp; struct rcu_head rcu; }; /* Anycast stuff */ struct ipv6_ac_socklist { struct in6_addr acl_addr; int acl_ifindex; struct ipv6_ac_socklist *acl_next; }; struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 __rcu *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK #define IFA_LINK IPV6_ADDR_LINKLOCAL #define IFA_SITE IPV6_ADDR_SITELOCAL struct ipv6_devstat { struct proc_dir_entry *proc_dir_entry; DEFINE_SNMP_STAT(struct ipstats_mib, ipv6); DEFINE_SNMP_STAT_ATOMIC(struct icmpv6_mib_device, icmpv6dev); DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib_device, icmpv6msgdev); }; struct inet6_dev { struct net_device *dev; netdevice_tracker dev_tracker; struct list_head addr_list; struct ifmcaddr6 __rcu *mc_list; struct ifmcaddr6 __rcu *mc_tomb; unsigned char mc_qrv; /* Query Robustness Variable */ unsigned char mc_gq_running; unsigned char mc_ifc_count; unsigned char mc_dad_count; unsigned long mc_v1_seen; /* Max time we stay in MLDv1 mode */ unsigned long mc_qi; /* Query Interval */ unsigned long mc_qri; /* Query Response Interval */ unsigned long mc_maxdelay; struct delayed_work mc_gq_work; /* general query work */ struct delayed_work mc_ifc_work; /* interface change work */ struct delayed_work mc_dad_work; /* dad complete mc work */ struct delayed_work mc_query_work; /* mld query work */ struct delayed_work mc_report_work; /* mld report work */ struct sk_buff_head mc_query_queue; /* mld query queue */ struct sk_buff_head mc_report_queue; /* mld report queue */ spinlock_t mc_query_lock; /* mld query queue lock */ spinlock_t mc_report_lock; /* mld query report lock */ struct mutex mc_lock; /* mld global lock */ struct ifacaddr6 __rcu *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; int dead; u32 desync_factor; struct list_head tempaddr_list; struct in6_addr token; struct neigh_parms *nd_parms; struct ipv6_devconf cnf; struct ipv6_devstat stats; struct timer_list rs_timer; __s32 rs_interval; /* in jiffies */ __u8 rs_probes; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; unsigned int ra_mtu; }; static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf) { /* * +-------+-------+-------+-------+-------+-------+ * | 33 | 33 | DST13 | DST14 | DST15 | DST16 | * +-------+-------+-------+-------+-------+-------+ */ buf[0]= 0x33; buf[1]= 0x33; memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32)); } static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf) { buf[0] = 0x00; } static inline void ipv6_ib_mc_map(const struct in6_addr *addr, const unsigned char *broadcast, char *buf) { unsigned char scope = broadcast[5] & 0xF; buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; buf[4] = 0xff; buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x60; /* IPv6 signature */ buf[7] = 0x1b; buf[8] = broadcast[8]; /* P_Key */ buf[9] = broadcast[9]; memcpy(buf + 10, addr->s6_addr + 6, 10); } static inline int ipv6_ipgre_mc_map(const struct in6_addr *addr, const unsigned char *broadcast, char *buf) { if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0) { memcpy(buf, broadcast, 4); } else { /* v4mapped? */ if ((addr->s6_addr32[0] | addr->s6_addr32[1] | (addr->s6_addr32[2] ^ htonl(0x0000ffff))) != 0) return -EINVAL; memcpy(buf, &addr->s6_addr32[3], 4); } return 0; } #endif
8 8 8 8 1 1 8 8 8 8 8 8 8 3 8 8 1 4 9 1 9 8 8 5 3 8 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 // SPDX-License-Identifier: GPL-2.0 /* * main.c - Multi purpose firmware loading support * * Copyright (c) 2003 Manuel Estrada Sainz * * Please see Documentation/driver-api/firmware/ for more information. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/device.h> #include <linux/kernel_read_file.h> #include <linux/module.h> #include <linux/init.h> #include <linux/initrd.h> #include <linux/timer.h> #include <linux/vmalloc.h> #include <linux/interrupt.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <linux/highmem.h> #include <linux/firmware.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/file.h> #include <linux/list.h> #include <linux/fs.h> #include <linux/async.h> #include <linux/pm.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/reboot.h> #include <linux/security.h> #include <linux/zstd.h> #include <linux/xz.h> #include <generated/utsrelease.h> #include "../base.h" #include "firmware.h" #include "fallback.h" MODULE_AUTHOR("Manuel Estrada Sainz"); MODULE_DESCRIPTION("Multi purpose firmware loading support"); MODULE_LICENSE("GPL"); struct firmware_cache { /* firmware_buf instance will be added into the below list */ spinlock_t lock; struct list_head head; int state; #ifdef CONFIG_FW_CACHE /* * Names of firmware images which have been cached successfully * will be added into the below list so that device uncache * helper can trace which firmware images have been cached * before. */ spinlock_t name_lock; struct list_head fw_names; struct delayed_work work; struct notifier_block pm_notify; #endif }; struct fw_cache_entry { struct list_head list; const char *name; }; struct fw_name_devm { unsigned long magic; const char *name; }; static inline struct fw_priv *to_fw_priv(struct kref *ref) { return container_of(ref, struct fw_priv, ref); } #define FW_LOADER_NO_CACHE 0 #define FW_LOADER_START_CACHE 1 /* fw_lock could be moved to 'struct fw_sysfs' but since it is just * guarding for corner cases a global lock should be OK */ DEFINE_MUTEX(fw_lock); struct firmware_cache fw_cache; bool fw_load_abort_all; void fw_state_init(struct fw_priv *fw_priv) { struct fw_state *fw_st = &fw_priv->fw_st; init_completion(&fw_st->completion); fw_st->status = FW_STATUS_UNKNOWN; } static inline int fw_state_wait(struct fw_priv *fw_priv) { return __fw_state_wait_common(fw_priv, MAX_SCHEDULE_TIMEOUT); } static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv); static struct fw_priv *__allocate_fw_priv(const char *fw_name, struct firmware_cache *fwc, void *dbuf, size_t size, size_t offset, u32 opt_flags) { struct fw_priv *fw_priv; /* For a partial read, the buffer must be preallocated. */ if ((opt_flags & FW_OPT_PARTIAL) && !dbuf) return NULL; /* Only partial reads are allowed to use an offset. */ if (offset != 0 && !(opt_flags & FW_OPT_PARTIAL)) return NULL; fw_priv = kzalloc(sizeof(*fw_priv), GFP_ATOMIC); if (!fw_priv) return NULL; fw_priv->fw_name = kstrdup_const(fw_name, GFP_ATOMIC); if (!fw_priv->fw_name) { kfree(fw_priv); return NULL; } kref_init(&fw_priv->ref); fw_priv->fwc = fwc; fw_priv->data = dbuf; fw_priv->allocated_size = size; fw_priv->offset = offset; fw_priv->opt_flags = opt_flags; fw_state_init(fw_priv); #ifdef CONFIG_FW_LOADER_USER_HELPER INIT_LIST_HEAD(&fw_priv->pending_list); #endif pr_debug("%s: fw-%s fw_priv=%p\n", __func__, fw_name, fw_priv); return fw_priv; } static struct fw_priv *__lookup_fw_priv(const char *fw_name) { struct fw_priv *tmp; struct firmware_cache *fwc = &fw_cache; list_for_each_entry(tmp, &fwc->head, list) if (!strcmp(tmp->fw_name, fw_name)) return tmp; return NULL; } /* Returns 1 for batching firmware requests with the same name */ int alloc_lookup_fw_priv(const char *fw_name, struct firmware_cache *fwc, struct fw_priv **fw_priv, void *dbuf, size_t size, size_t offset, u32 opt_flags) { struct fw_priv *tmp; spin_lock(&fwc->lock); /* * Do not merge requests that are marked to be non-cached or * are performing partial reads. */ if (!(opt_flags & (FW_OPT_NOCACHE | FW_OPT_PARTIAL))) { tmp = __lookup_fw_priv(fw_name); if (tmp) { kref_get(&tmp->ref); spin_unlock(&fwc->lock); *fw_priv = tmp; pr_debug("batched request - sharing the same struct fw_priv and lookup for multiple requests\n"); return 1; } } tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size, offset, opt_flags); if (tmp) { INIT_LIST_HEAD(&tmp->list); if (!(opt_flags & FW_OPT_NOCACHE)) list_add(&tmp->list, &fwc->head); } spin_unlock(&fwc->lock); *fw_priv = tmp; return tmp ? 0 : -ENOMEM; } static void __free_fw_priv(struct kref *ref) __releases(&fwc->lock) { struct fw_priv *fw_priv = to_fw_priv(ref); struct firmware_cache *fwc = fw_priv->fwc; pr_debug("%s: fw-%s fw_priv=%p data=%p size=%u\n", __func__, fw_priv->fw_name, fw_priv, fw_priv->data, (unsigned int)fw_priv->size); list_del(&fw_priv->list); spin_unlock(&fwc->lock); if (fw_is_paged_buf(fw_priv)) fw_free_paged_buf(fw_priv); else if (!fw_priv->allocated_size) vfree(fw_priv->data); kfree_const(fw_priv->fw_name); kfree(fw_priv); } void free_fw_priv(struct fw_priv *fw_priv) { struct firmware_cache *fwc = fw_priv->fwc; spin_lock(&fwc->lock); if (!kref_put(&fw_priv->ref, __free_fw_priv)) spin_unlock(&fwc->lock); } #ifdef CONFIG_FW_LOADER_PAGED_BUF bool fw_is_paged_buf(struct fw_priv *fw_priv) { return fw_priv->is_paged_buf; } void fw_free_paged_buf(struct fw_priv *fw_priv) { int i; if (!fw_priv->pages) return; vunmap(fw_priv->data); for (i = 0; i < fw_priv->nr_pages; i++) __free_page(fw_priv->pages[i]); kvfree(fw_priv->pages); fw_priv->pages = NULL; fw_priv->page_array_size = 0; fw_priv->nr_pages = 0; fw_priv->data = NULL; fw_priv->size = 0; } int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { /* If the array of pages is too small, grow it */ if (fw_priv->page_array_size < pages_needed) { int new_array_size = max(pages_needed, fw_priv->page_array_size * 2); struct page **new_pages; new_pages = kvmalloc_array(new_array_size, sizeof(void *), GFP_KERNEL); if (!new_pages) return -ENOMEM; memcpy(new_pages, fw_priv->pages, fw_priv->page_array_size * sizeof(void *)); memset(&new_pages[fw_priv->page_array_size], 0, sizeof(void *) * (new_array_size - fw_priv->page_array_size)); kvfree(fw_priv->pages); fw_priv->pages = new_pages; fw_priv->page_array_size = new_array_size; } while (fw_priv->nr_pages < pages_needed) { fw_priv->pages[fw_priv->nr_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); if (!fw_priv->pages[fw_priv->nr_pages]) return -ENOMEM; fw_priv->nr_pages++; } return 0; } int fw_map_paged_buf(struct fw_priv *fw_priv) { /* one pages buffer should be mapped/unmapped only once */ if (!fw_priv->pages) return 0; vunmap(fw_priv->data); fw_priv->data = vmap(fw_priv->pages, fw_priv->nr_pages, 0, PAGE_KERNEL_RO); if (!fw_priv->data) return -ENOMEM; return 0; } #endif /* * ZSTD-compressed firmware support */ #ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD static int fw_decompress_zstd(struct device *dev, struct fw_priv *fw_priv, size_t in_size, const void *in_buffer) { size_t len, out_size, workspace_size; void *workspace, *out_buf; zstd_dctx *ctx; int err; if (fw_priv->allocated_size) { out_size = fw_priv->allocated_size; out_buf = fw_priv->data; } else { zstd_frame_header params; if (zstd_get_frame_header(&params, in_buffer, in_size) || params.frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) { dev_dbg(dev, "%s: invalid zstd header\n", __func__); return -EINVAL; } out_size = params.frameContentSize; out_buf = vzalloc(out_size); if (!out_buf) return -ENOMEM; } workspace_size = zstd_dctx_workspace_bound(); workspace = kvzalloc(workspace_size, GFP_KERNEL); if (!workspace) { err = -ENOMEM; goto error; } ctx = zstd_init_dctx(workspace, workspace_size); if (!ctx) { dev_dbg(dev, "%s: failed to initialize context\n", __func__); err = -EINVAL; goto error; } len = zstd_decompress_dctx(ctx, out_buf, out_size, in_buffer, in_size); if (zstd_is_error(len)) { dev_dbg(dev, "%s: failed to decompress: %d\n", __func__, zstd_get_error_code(len)); err = -EINVAL; goto error; } if (!fw_priv->allocated_size) fw_priv->data = out_buf; fw_priv->size = len; err = 0; error: kvfree(workspace); if (err && !fw_priv->allocated_size) vfree(out_buf); return err; } #endif /* CONFIG_FW_LOADER_COMPRESS_ZSTD */ /* * XZ-compressed firmware support */ #ifdef CONFIG_FW_LOADER_COMPRESS_XZ /* show an error and return the standard error code */ static int fw_decompress_xz_error(struct device *dev, enum xz_ret xz_ret) { if (xz_ret != XZ_STREAM_END) { dev_warn(dev, "xz decompression failed (xz_ret=%d)\n", xz_ret); return xz_ret == XZ_MEM_ERROR ? -ENOMEM : -EINVAL; } return 0; } /* single-shot decompression onto the pre-allocated buffer */ static int fw_decompress_xz_single(struct device *dev, struct fw_priv *fw_priv, size_t in_size, const void *in_buffer) { struct xz_dec *xz_dec; struct xz_buf xz_buf; enum xz_ret xz_ret; xz_dec = xz_dec_init(XZ_SINGLE, (u32)-1); if (!xz_dec) return -ENOMEM; xz_buf.in_size = in_size; xz_buf.in = in_buffer; xz_buf.in_pos = 0; xz_buf.out_size = fw_priv->allocated_size; xz_buf.out = fw_priv->data; xz_buf.out_pos = 0; xz_ret = xz_dec_run(xz_dec, &xz_buf); xz_dec_end(xz_dec); fw_priv->size = xz_buf.out_pos; return fw_decompress_xz_error(dev, xz_ret); } /* decompression on paged buffer and map it */ static int fw_decompress_xz_pages(struct device *dev, struct fw_priv *fw_priv, size_t in_size, const void *in_buffer) { struct xz_dec *xz_dec; struct xz_buf xz_buf; enum xz_ret xz_ret; struct page *page; int err = 0; xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1); if (!xz_dec) return -ENOMEM; xz_buf.in_size = in_size; xz_buf.in = in_buffer; xz_buf.in_pos = 0; fw_priv->is_paged_buf = true; fw_priv->size = 0; do { if (fw_grow_paged_buf(fw_priv, fw_priv->nr_pages + 1)) { err = -ENOMEM; goto out; } /* decompress onto the new allocated page */ page = fw_priv->pages[fw_priv->nr_pages - 1]; xz_buf.out = kmap_local_page(page); xz_buf.out_pos = 0; xz_buf.out_size = PAGE_SIZE; xz_ret = xz_dec_run(xz_dec, &xz_buf); kunmap_local(xz_buf.out); fw_priv->size += xz_buf.out_pos; /* partial decompression means either end or error */ if (xz_buf.out_pos != PAGE_SIZE) break; } while (xz_ret == XZ_OK); err = fw_decompress_xz_error(dev, xz_ret); if (!err) err = fw_map_paged_buf(fw_priv); out: xz_dec_end(xz_dec); return err; } static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv, size_t in_size, const void *in_buffer) { /* if the buffer is pre-allocated, we can perform in single-shot mode */ if (fw_priv->data) return fw_decompress_xz_single(dev, fw_priv, in_size, in_buffer); else return fw_decompress_xz_pages(dev, fw_priv, in_size, in_buffer); } #endif /* CONFIG_FW_LOADER_COMPRESS_XZ */ /* direct firmware loading support */ static char fw_path_para[256]; static const char * const fw_path[] = { fw_path_para, "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, "/lib/firmware" }; /* * Typical usage is that passing 'firmware_class.path=$CUSTOMIZED_PATH' * from kernel command line because firmware_class is generally built in * kernel instead of module. */ module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644); MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path"); static int fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv, const char *suffix, int (*decompress)(struct device *dev, struct fw_priv *fw_priv, size_t in_size, const void *in_buffer)) { size_t size; int i, len, maxlen = 0; int rc = -ENOENT; char *path, *nt = NULL; size_t msize = INT_MAX; void *buffer = NULL; /* Already populated data member means we're loading into a buffer */ if (!decompress && fw_priv->data) { buffer = fw_priv->data; msize = fw_priv->allocated_size; } path = __getname(); if (!path) return -ENOMEM; wait_for_initramfs(); for (i = 0; i < ARRAY_SIZE(fw_path); i++) { size_t file_size = 0; size_t *file_size_ptr = NULL; /* skip the unset customized path */ if (!fw_path[i][0]) continue; /* strip off \n from customized path */ maxlen = strlen(fw_path[i]); if (i == 0) { nt = strchr(fw_path[i], '\n'); if (nt) maxlen = nt - fw_path[i]; } len = snprintf(path, PATH_MAX, "%.*s/%s%s", maxlen, fw_path[i], fw_priv->fw_name, suffix); if (len >= PATH_MAX) { rc = -ENAMETOOLONG; break; } fw_priv->size = 0; /* * The total file size is only examined when doing a partial * read; the "full read" case needs to fail if the whole * firmware was not completely loaded. */ if ((fw_priv->opt_flags & FW_OPT_PARTIAL) && buffer) file_size_ptr = &file_size; /* load firmware files from the mount namespace of init */ rc = kernel_read_file_from_path_initns(path, fw_priv->offset, &buffer, msize, file_size_ptr, READING_FIRMWARE); if (rc < 0) { if (!(fw_priv->opt_flags & FW_OPT_NO_WARN)) { if (rc != -ENOENT) dev_warn(device, "loading %s failed with error %d\n", path, rc); else dev_dbg(device, "loading %s failed for no such file or directory.\n", path); } continue; } size = rc; rc = 0; dev_dbg(device, "Loading firmware from %s\n", path); if (decompress) { dev_dbg(device, "f/w decompressing %s\n", fw_priv->fw_name); rc = decompress(device, fw_priv, size, buffer); /* discard the superfluous original content */ vfree(buffer); buffer = NULL; if (rc) { fw_free_paged_buf(fw_priv); continue; } } else { dev_dbg(device, "direct-loading %s\n", fw_priv->fw_name); if (!fw_priv->data) fw_priv->data = buffer; fw_priv->size = size; } fw_state_done(fw_priv); break; } __putname(path); return rc; } /* firmware holds the ownership of pages */ static void firmware_free_data(const struct firmware *fw) { /* Loaded directly? */ if (!fw->priv) { vfree(fw->data); return; } free_fw_priv(fw->priv); } /* store the pages buffer info firmware from buf */ static void fw_set_page_data(struct fw_priv *fw_priv, struct firmware *fw) { fw->priv = fw_priv; fw->size = fw_priv->size; fw->data = fw_priv->data; pr_debug("%s: fw-%s fw_priv=%p data=%p size=%u\n", __func__, fw_priv->fw_name, fw_priv, fw_priv->data, (unsigned int)fw_priv->size); } #ifdef CONFIG_FW_CACHE static void fw_name_devm_release(struct device *dev, void *res) { struct fw_name_devm *fwn = res; if (fwn->magic == (unsigned long)&fw_cache) pr_debug("%s: fw_name-%s devm-%p released\n", __func__, fwn->name, res); kfree_const(fwn->name); } static int fw_devm_match(struct device *dev, void *res, void *match_data) { struct fw_name_devm *fwn = res; return (fwn->magic == (unsigned long)&fw_cache) && !strcmp(fwn->name, match_data); } static struct fw_name_devm *fw_find_devm_name(struct device *dev, const char *name) { struct fw_name_devm *fwn; fwn = devres_find(dev, fw_name_devm_release, fw_devm_match, (void *)name); return fwn; } static bool fw_cache_is_setup(struct device *dev, const char *name) { struct fw_name_devm *fwn; fwn = fw_find_devm_name(dev, name); if (fwn) return true; return false; } /* add firmware name into devres list */ static int fw_add_devm_name(struct device *dev, const char *name) { struct fw_name_devm *fwn; if (fw_cache_is_setup(dev, name)) return 0; fwn = devres_alloc(fw_name_devm_release, sizeof(struct fw_name_devm), GFP_KERNEL); if (!fwn) return -ENOMEM; fwn->name = kstrdup_const(name, GFP_KERNEL); if (!fwn->name) { devres_free(fwn); return -ENOMEM; } fwn->magic = (unsigned long)&fw_cache; devres_add(dev, fwn); return 0; } #else static bool fw_cache_is_setup(struct device *dev, const char *name) { return false; } static int fw_add_devm_name(struct device *dev, const char *name) { return 0; } #endif int assign_fw(struct firmware *fw, struct device *device) { struct fw_priv *fw_priv = fw->priv; int ret; mutex_lock(&fw_lock); if (!fw_priv->size || fw_state_is_aborted(fw_priv)) { mutex_unlock(&fw_lock); return -ENOENT; } /* * add firmware name into devres list so that we can auto cache * and uncache firmware for device. * * device may has been deleted already, but the problem * should be fixed in devres or driver core. */ /* don't cache firmware handled without uevent */ if (device && (fw_priv->opt_flags & FW_OPT_UEVENT) && !(fw_priv->opt_flags & FW_OPT_NOCACHE)) { ret = fw_add_devm_name(device, fw_priv->fw_name); if (ret) { mutex_unlock(&fw_lock); return ret; } } /* * After caching firmware image is started, let it piggyback * on request firmware. */ if (!(fw_priv->opt_flags & FW_OPT_NOCACHE) && fw_priv->fwc->state == FW_LOADER_START_CACHE) fw_cache_piggyback_on_request(fw_priv); /* pass the pages buffer to driver at the last minute */ fw_set_page_data(fw_priv, fw); mutex_unlock(&fw_lock); return 0; } /* prepare firmware and firmware_buf structs; * return 0 if a firmware is already assigned, 1 if need to load one, * or a negative error code */ static int _request_firmware_prepare(struct firmware **firmware_p, const char *name, struct device *device, void *dbuf, size_t size, size_t offset, u32 opt_flags) { struct firmware *firmware; struct fw_priv *fw_priv; int ret; *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL); if (!firmware) { dev_err(device, "%s: kmalloc(struct firmware) failed\n", __func__); return -ENOMEM; } if (firmware_request_builtin_buf(firmware, name, dbuf, size)) { dev_dbg(device, "using built-in %s\n", name); return 0; /* assigned */ } ret = alloc_lookup_fw_priv(name, &fw_cache, &fw_priv, dbuf, size, offset, opt_flags); /* * bind with 'priv' now to avoid warning in failure path * of requesting firmware. */ firmware->priv = fw_priv; if (ret > 0) { ret = fw_state_wait(fw_priv); if (!ret) { fw_set_page_data(fw_priv, firmware); return 0; /* assigned */ } } if (ret < 0) return ret; return 1; /* need to load */ } /* * Batched requests need only one wake, we need to do this step last due to the * fallback mechanism. The buf is protected with kref_get(), and it won't be * released until the last user calls release_firmware(). * * Failed batched requests are possible as well, in such cases we just share * the struct fw_priv and won't release it until all requests are woken * and have gone through this same path. */ static void fw_abort_batch_reqs(struct firmware *fw) { struct fw_priv *fw_priv; /* Loaded directly? */ if (!fw || !fw->priv) return; fw_priv = fw->priv; mutex_lock(&fw_lock); if (!fw_state_is_aborted(fw_priv)) fw_state_aborted(fw_priv); mutex_unlock(&fw_lock); } #if defined(CONFIG_FW_LOADER_DEBUG) #include <crypto/hash.h> #include <crypto/sha2.h> static void fw_log_firmware_info(const struct firmware *fw, const char *name, struct device *device) { struct shash_desc *shash; struct crypto_shash *alg; u8 *sha256buf; char *outbuf; alg = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(alg)) return; sha256buf = kmalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); outbuf = kmalloc(SHA256_BLOCK_SIZE + 1, GFP_KERNEL); shash = kmalloc(sizeof(*shash) + crypto_shash_descsize(alg), GFP_KERNEL); if (!sha256buf || !outbuf || !shash) goto out_free; shash->tfm = alg; if (crypto_shash_digest(shash, fw->data, fw->size, sha256buf) < 0) goto out_free; for (int i = 0; i < SHA256_DIGEST_SIZE; i++) sprintf(&outbuf[i * 2], "%02x", sha256buf[i]); outbuf[SHA256_BLOCK_SIZE] = 0; dev_dbg(device, "Loaded FW: %s, sha256: %s\n", name, outbuf); out_free: kfree(shash); kfree(outbuf); kfree(sha256buf); crypto_free_shash(alg); } #else static void fw_log_firmware_info(const struct firmware *fw, const char *name, struct device *device) {} #endif /* * Reject firmware file names with ".." path components. * There are drivers that construct firmware file names from device-supplied * strings, and we don't want some device to be able to tell us "I would like to * be sent my firmware from ../../../etc/shadow, please". * * Search for ".." surrounded by either '/' or start/end of string. * * This intentionally only looks at the firmware name, not at the firmware base * directory or at symlink contents. */ static bool name_contains_dotdot(const char *name) { size_t name_len = strlen(name); return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 || strstr(name, "/../") != NULL || (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0); } /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, struct device *device, void *buf, size_t size, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; struct cred *kern_cred = NULL; const struct cred *old_cred; bool nondirect = false; int ret; if (!firmware_p) return -EINVAL; if (!name || name[0] == '\0') { ret = -EINVAL; goto out; } if (name_contains_dotdot(name)) { dev_warn(device, "Firmware load for '%s' refused, path contains '..' component\n", name); ret = -EINVAL; goto out; } ret = _request_firmware_prepare(&fw, name, device, buf, size, offset, opt_flags); if (ret <= 0) /* error or already assigned */ goto out; /* * We are about to try to access the firmware file. Because we may have been * called by a driver when serving an unrelated request from userland, we use * the kernel credentials to read the file. */ kern_cred = prepare_kernel_cred(&init_task); if (!kern_cred) { ret = -ENOMEM; goto out; } old_cred = override_creds(kern_cred); ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); /* Only full reads can support decompression, platform, and sysfs. */ if (!(opt_flags & FW_OPT_PARTIAL)) nondirect = true; #ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD if (ret == -ENOENT && nondirect) ret = fw_get_filesystem_firmware(device, fw->priv, ".zst", fw_decompress_zstd); #endif #ifdef CONFIG_FW_LOADER_COMPRESS_XZ if (ret == -ENOENT && nondirect) ret = fw_get_filesystem_firmware(device, fw->priv, ".xz", fw_decompress_xz); #endif if (ret == -ENOENT && nondirect) ret = firmware_fallback_platform(fw->priv); if (ret) { if (!(opt_flags & FW_OPT_NO_WARN)) dev_warn(device, "Direct firmware load for %s failed with error %d\n", name, ret); if (nondirect) ret = firmware_fallback_sysfs(fw, name, device, opt_flags, ret); } else ret = assign_fw(fw, device); revert_creds(old_cred); put_cred(kern_cred); out: if (ret < 0) { fw_abort_batch_reqs(fw); release_firmware(fw); fw = NULL; } else { fw_log_firmware_info(fw, name, device); } *firmware_p = fw; return ret; } /** * request_firmware() - send firmware request and wait for it * @firmware_p: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded * * @firmware_p will be used to return a firmware image by the name * of @name for device @device. * * Should be called from user context where sleeping is allowed. * * @name will be used as $FIRMWARE in the uevent environment and * should be distinctive enough not to be confused with any other * firmware image for this or any other device. * It must not contain any ".." path components - "foo/bar..bin" is * allowed, but "foo/../bar.bin" is not. * * Caller must hold the reference count of @device. * * The function can be called safely inside device's suspend and * resume callback. **/ int request_firmware(const struct firmware **firmware_p, const char *name, struct device *device) { int ret; /* Need to pin this module until return */ __module_get(THIS_MODULE); ret = _request_firmware(firmware_p, name, device, NULL, 0, 0, FW_OPT_UEVENT); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL(request_firmware); /** * firmware_request_nowarn() - request for an optional fw module * @firmware: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded * * This function is similar in behaviour to request_firmware(), except it * doesn't produce warning messages when the file is not found. The sysfs * fallback mechanism is enabled if direct filesystem lookup fails. However, * failures to find the firmware file with it are still suppressed. It is * therefore up to the driver to check for the return value of this call and to * decide when to inform the users of errors. **/ int firmware_request_nowarn(const struct firmware **firmware, const char *name, struct device *device) { int ret; /* Need to pin this module until return */ __module_get(THIS_MODULE); ret = _request_firmware(firmware, name, device, NULL, 0, 0, FW_OPT_UEVENT | FW_OPT_NO_WARN); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL_GPL(firmware_request_nowarn); /** * request_firmware_direct() - load firmware directly without usermode helper * @firmware_p: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded * * This function works pretty much like request_firmware(), but this doesn't * fall back to usermode helper even if the firmware couldn't be loaded * directly from fs. Hence it's useful for loading optional firmwares, which * aren't always present, without extra long timeouts of udev. **/ int request_firmware_direct(const struct firmware **firmware_p, const char *name, struct device *device) { int ret; __module_get(THIS_MODULE); ret = _request_firmware(firmware_p, name, device, NULL, 0, 0, FW_OPT_UEVENT | FW_OPT_NO_WARN | FW_OPT_NOFALLBACK_SYSFS); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL_GPL(request_firmware_direct); /** * firmware_request_platform() - request firmware with platform-fw fallback * @firmware: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded * * This function is similar in behaviour to request_firmware, except that if * direct filesystem lookup fails, it will fallback to looking for a copy of the * requested firmware embedded in the platform's main (e.g. UEFI) firmware. **/ int firmware_request_platform(const struct firmware **firmware, const char *name, struct device *device) { int ret; /* Need to pin this module until return */ __module_get(THIS_MODULE); ret = _request_firmware(firmware, name, device, NULL, 0, 0, FW_OPT_UEVENT | FW_OPT_FALLBACK_PLATFORM); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL_GPL(firmware_request_platform); /** * firmware_request_cache() - cache firmware for suspend so resume can use it * @device: device for which firmware should be cached for * @name: name of firmware file * * There are some devices with an optimization that enables the device to not * require loading firmware on system reboot. This optimization may still * require the firmware present on resume from suspend. This routine can be * used to ensure the firmware is present on resume from suspend in these * situations. This helper is not compatible with drivers which use * request_firmware_into_buf() or request_firmware_nowait() with no uevent set. **/ int firmware_request_cache(struct device *device, const char *name) { int ret; mutex_lock(&fw_lock); ret = fw_add_devm_name(device, name); mutex_unlock(&fw_lock); return ret; } EXPORT_SYMBOL_GPL(firmware_request_cache); /** * request_firmware_into_buf() - load firmware into a previously allocated buffer * @firmware_p: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded and DMA region allocated * @buf: address of buffer to load firmware into * @size: size of buffer * * This function works pretty much like request_firmware(), but it doesn't * allocate a buffer to hold the firmware data. Instead, the firmware * is loaded directly into the buffer pointed to by @buf and the @firmware_p * data member is pointed at @buf. * * This function doesn't cache firmware either. */ int request_firmware_into_buf(const struct firmware **firmware_p, const char *name, struct device *device, void *buf, size_t size) { int ret; if (fw_cache_is_setup(device, name)) return -EOPNOTSUPP; __module_get(THIS_MODULE); ret = _request_firmware(firmware_p, name, device, buf, size, 0, FW_OPT_UEVENT | FW_OPT_NOCACHE); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL(request_firmware_into_buf); /** * request_partial_firmware_into_buf() - load partial firmware into a previously allocated buffer * @firmware_p: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded and DMA region allocated * @buf: address of buffer to load firmware into * @size: size of buffer * @offset: offset into file to read * * This function works pretty much like request_firmware_into_buf except * it allows a partial read of the file. */ int request_partial_firmware_into_buf(const struct firmware **firmware_p, const char *name, struct device *device, void *buf, size_t size, size_t offset) { int ret; if (fw_cache_is_setup(device, name)) return -EOPNOTSUPP; __module_get(THIS_MODULE); ret = _request_firmware(firmware_p, name, device, buf, size, offset, FW_OPT_UEVENT | FW_OPT_NOCACHE | FW_OPT_PARTIAL); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL(request_partial_firmware_into_buf); /** * release_firmware() - release the resource associated with a firmware image * @fw: firmware resource to release **/ void release_firmware(const struct firmware *fw) { if (fw) { if (!firmware_is_builtin(fw)) firmware_free_data(fw); kfree(fw); } } EXPORT_SYMBOL(release_firmware); /* Async support */ struct firmware_work { struct work_struct work; struct module *module; const char *name; struct device *device; void *context; void (*cont)(const struct firmware *fw, void *context); u32 opt_flags; }; static void request_firmware_work_func(struct work_struct *work) { struct firmware_work *fw_work; const struct firmware *fw; fw_work = container_of(work, struct firmware_work, work); _request_firmware(&fw, fw_work->name, fw_work->device, NULL, 0, 0, fw_work->opt_flags); fw_work->cont(fw, fw_work->context); put_device(fw_work->device); /* taken in request_firmware_nowait() */ module_put(fw_work->module); kfree_const(fw_work->name); kfree(fw_work); } static int _request_firmware_nowait( struct module *module, bool uevent, const char *name, struct device *device, gfp_t gfp, void *context, void (*cont)(const struct firmware *fw, void *context), bool nowarn) { struct firmware_work *fw_work; fw_work = kzalloc(sizeof(struct firmware_work), gfp); if (!fw_work) return -ENOMEM; fw_work->module = module; fw_work->name = kstrdup_const(name, gfp); if (!fw_work->name) { kfree(fw_work); return -ENOMEM; } fw_work->device = device; fw_work->context = context; fw_work->cont = cont; fw_work->opt_flags = FW_OPT_NOWAIT | (uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER) | (nowarn ? FW_OPT_NO_WARN : 0); if (!uevent && fw_cache_is_setup(device, name)) { kfree_const(fw_work->name); kfree(fw_work); return -EOPNOTSUPP; } if (!try_module_get(module)) { kfree_const(fw_work->name); kfree(fw_work); return -EFAULT; } get_device(fw_work->device); INIT_WORK(&fw_work->work, request_firmware_work_func); schedule_work(&fw_work->work); return 0; } /** * request_firmware_nowait() - asynchronous version of request_firmware * @module: module requesting the firmware * @uevent: sends uevent to copy the firmware image if this flag * is non-zero else the firmware copy must be done manually. * @name: name of firmware file * @device: device for which firmware is being loaded * @gfp: allocation flags * @context: will be passed over to @cont, and * @fw may be %NULL if firmware request fails. * @cont: function will be called asynchronously when the firmware * request is over. * * Caller must hold the reference count of @device. * * Asynchronous variant of request_firmware() for user contexts: * - sleep for as small periods as possible since it may * increase kernel boot time of built-in device drivers * requesting firmware in their ->probe() methods, if * @gfp is GFP_KERNEL. * * - can't sleep at all if @gfp is GFP_ATOMIC. **/ int request_firmware_nowait( struct module *module, bool uevent, const char *name, struct device *device, gfp_t gfp, void *context, void (*cont)(const struct firmware *fw, void *context)) { return _request_firmware_nowait(module, uevent, name, device, gfp, context, cont, false); } EXPORT_SYMBOL(request_firmware_nowait); /** * firmware_request_nowait_nowarn() - async version of request_firmware_nowarn * @module: module requesting the firmware * @name: name of firmware file * @device: device for which firmware is being loaded * @gfp: allocation flags * @context: will be passed over to @cont, and * @fw may be %NULL if firmware request fails. * @cont: function will be called asynchronously when the firmware * request is over. * * Similar in function to request_firmware_nowait(), but doesn't print a warning * when the firmware file could not be found and always sends a uevent to copy * the firmware image. */ int firmware_request_nowait_nowarn( struct module *module, const char *name, struct device *device, gfp_t gfp, void *context, void (*cont)(const struct firmware *fw, void *context)) { return _request_firmware_nowait(module, FW_ACTION_UEVENT, name, device, gfp, context, cont, true); } EXPORT_SYMBOL_GPL(firmware_request_nowait_nowarn); #ifdef CONFIG_FW_CACHE static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain); /** * cache_firmware() - cache one firmware image in kernel memory space * @fw_name: the firmware image name * * Cache firmware in kernel memory so that drivers can use it when * system isn't ready for them to request firmware image from userspace. * Once it returns successfully, driver can use request_firmware or its * nowait version to get the cached firmware without any interacting * with userspace * * Return 0 if the firmware image has been cached successfully * Return !0 otherwise * */ static int cache_firmware(const char *fw_name) { int ret; const struct firmware *fw; pr_debug("%s: %s\n", __func__, fw_name); ret = request_firmware(&fw, fw_name, NULL); if (!ret) kfree(fw); pr_debug("%s: %s ret=%d\n", __func__, fw_name, ret); return ret; } static struct fw_priv *lookup_fw_priv(const char *fw_name) { struct fw_priv *tmp; struct firmware_cache *fwc = &fw_cache; spin_lock(&fwc->lock); tmp = __lookup_fw_priv(fw_name); spin_unlock(&fwc->lock); return tmp; } /** * uncache_firmware() - remove one cached firmware image * @fw_name: the firmware image name * * Uncache one firmware image which has been cached successfully * before. * * Return 0 if the firmware cache has been removed successfully * Return !0 otherwise * */ static int uncache_firmware(const char *fw_name) { struct fw_priv *fw_priv; struct firmware fw; pr_debug("%s: %s\n", __func__, fw_name); if (firmware_request_builtin(&fw, fw_name)) return 0; fw_priv = lookup_fw_priv(fw_name); if (fw_priv) { free_fw_priv(fw_priv); return 0; } return -EINVAL; } static struct fw_cache_entry *alloc_fw_cache_entry(const char *name) { struct fw_cache_entry *fce; fce = kzalloc(sizeof(*fce), GFP_ATOMIC); if (!fce) goto exit; fce->name = kstrdup_const(name, GFP_ATOMIC); if (!fce->name) { kfree(fce); fce = NULL; goto exit; } exit: return fce; } static int __fw_entry_found(const char *name) { struct firmware_cache *fwc = &fw_cache; struct fw_cache_entry *fce; list_for_each_entry(fce, &fwc->fw_names, list) { if (!strcmp(fce->name, name)) return 1; } return 0; } static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv) { const char *name = fw_priv->fw_name; struct firmware_cache *fwc = fw_priv->fwc; struct fw_cache_entry *fce; spin_lock(&fwc->name_lock); if (__fw_entry_found(name)) goto found; fce = alloc_fw_cache_entry(name); if (fce) { list_add(&fce->list, &fwc->fw_names); kref_get(&fw_priv->ref); pr_debug("%s: fw: %s\n", __func__, name); } found: spin_unlock(&fwc->name_lock); } static void free_fw_cache_entry(struct fw_cache_entry *fce) { kfree_const(fce->name); kfree(fce); } static void __async_dev_cache_fw_image(void *fw_entry, async_cookie_t cookie) { struct fw_cache_entry *fce = fw_entry; struct firmware_cache *fwc = &fw_cache; int ret; ret = cache_firmware(fce->name); if (ret) { spin_lock(&fwc->name_lock); list_del(&fce->list); spin_unlock(&fwc->name_lock); free_fw_cache_entry(fce); } } /* called with dev->devres_lock held */ static void dev_create_fw_entry(struct device *dev, void *res, void *data) { struct fw_name_devm *fwn = res; const char *fw_name = fwn->name; struct list_head *head = data; struct fw_cache_entry *fce; fce = alloc_fw_cache_entry(fw_name); if (fce) list_add(&fce->list, head); } static int devm_name_match(struct device *dev, void *res, void *match_data) { struct fw_name_devm *fwn = res; return (fwn->magic == (unsigned long)match_data); } static void dev_cache_fw_image(struct device *dev, void *data) { LIST_HEAD(todo); struct fw_cache_entry *fce; struct fw_cache_entry *fce_next; struct firmware_cache *fwc = &fw_cache; devres_for_each_res(dev, fw_name_devm_release, devm_name_match, &fw_cache, dev_create_fw_entry, &todo); list_for_each_entry_safe(fce, fce_next, &todo, list) { list_del(&fce->list); spin_lock(&fwc->name_lock); /* only one cache entry for one firmware */ if (!__fw_entry_found(fce->name)) { list_add(&fce->list, &fwc->fw_names); } else { free_fw_cache_entry(fce); fce = NULL; } spin_unlock(&fwc->name_lock); if (fce) async_schedule_domain(__async_dev_cache_fw_image, (void *)fce, &fw_cache_domain); } } static void __device_uncache_fw_images(void) { struct firmware_cache *fwc = &fw_cache; struct fw_cache_entry *fce; spin_lock(&fwc->name_lock); while (!list_empty(&fwc->fw_names)) { fce = list_entry(fwc->fw_names.next, struct fw_cache_entry, list); list_del(&fce->list); spin_unlock(&fwc->name_lock); uncache_firmware(fce->name); free_fw_cache_entry(fce); spin_lock(&fwc->name_lock); } spin_unlock(&fwc->name_lock); } /** * device_cache_fw_images() - cache devices' firmware * * If one device called request_firmware or its nowait version * successfully before, the firmware names are recored into the * device's devres link list, so device_cache_fw_images can call * cache_firmware() to cache these firmwares for the device, * then the device driver can load its firmwares easily at * time when system is not ready to complete loading firmware. */ static void device_cache_fw_images(void) { struct firmware_cache *fwc = &fw_cache; DEFINE_WAIT(wait); pr_debug("%s\n", __func__); /* cancel uncache work */ cancel_delayed_work_sync(&fwc->work); fw_fallback_set_cache_timeout(); mutex_lock(&fw_lock); fwc->state = FW_LOADER_START_CACHE; dpm_for_each_dev(NULL, dev_cache_fw_image); mutex_unlock(&fw_lock); /* wait for completion of caching firmware for all devices */ async_synchronize_full_domain(&fw_cache_domain); fw_fallback_set_default_timeout(); } /** * device_uncache_fw_images() - uncache devices' firmware * * uncache all firmwares which have been cached successfully * by device_uncache_fw_images earlier */ static void device_uncache_fw_images(void) { pr_debug("%s\n", __func__); __device_uncache_fw_images(); } static void device_uncache_fw_images_work(struct work_struct *work) { device_uncache_fw_images(); } /** * device_uncache_fw_images_delay() - uncache devices firmwares * @delay: number of milliseconds to delay uncache device firmwares * * uncache all devices's firmwares which has been cached successfully * by device_cache_fw_images after @delay milliseconds. */ static void device_uncache_fw_images_delay(unsigned long delay) { queue_delayed_work(system_power_efficient_wq, &fw_cache.work, msecs_to_jiffies(delay)); } static int fw_pm_notify(struct notifier_block *notify_block, unsigned long mode, void *unused) { switch (mode) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: case PM_RESTORE_PREPARE: /* * Here, kill pending fallback requests will only kill * non-uevent firmware request to avoid stalling suspend. */ kill_pending_fw_fallback_reqs(false); device_cache_fw_images(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: case PM_POST_RESTORE: /* * In case that system sleep failed and syscore_suspend is * not called. */ mutex_lock(&fw_lock); fw_cache.state = FW_LOADER_NO_CACHE; mutex_unlock(&fw_lock); device_uncache_fw_images_delay(10 * MSEC_PER_SEC); break; } return 0; } /* stop caching firmware once syscore_suspend is reached */ static int fw_suspend(void) { fw_cache.state = FW_LOADER_NO_CACHE; return 0; } static struct syscore_ops fw_syscore_ops = { .suspend = fw_suspend, }; static int __init register_fw_pm_ops(void) { int ret; spin_lock_init(&fw_cache.name_lock); INIT_LIST_HEAD(&fw_cache.fw_names); INIT_DELAYED_WORK(&fw_cache.work, device_uncache_fw_images_work); fw_cache.pm_notify.notifier_call = fw_pm_notify; ret = register_pm_notifier(&fw_cache.pm_notify); if (ret) return ret; register_syscore_ops(&fw_syscore_ops); return ret; } static inline void unregister_fw_pm_ops(void) { unregister_syscore_ops(&fw_syscore_ops); unregister_pm_notifier(&fw_cache.pm_notify); } #else static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv) { } static inline int register_fw_pm_ops(void) { return 0; } static inline void unregister_fw_pm_ops(void) { } #endif static void __init fw_cache_init(void) { spin_lock_init(&fw_cache.lock); INIT_LIST_HEAD(&fw_cache.head); fw_cache.state = FW_LOADER_NO_CACHE; } static int fw_shutdown_notify(struct notifier_block *unused1, unsigned long unused2, void *unused3) { /* * Kill all pending fallback requests to avoid both stalling shutdown, * and avoid a deadlock with the usermode_lock. */ kill_pending_fw_fallback_reqs(true); return NOTIFY_DONE; } static struct notifier_block fw_shutdown_nb = { .notifier_call = fw_shutdown_notify, }; static int __init firmware_class_init(void) { int ret; /* No need to unfold these on exit */ fw_cache_init(); ret = register_fw_pm_ops(); if (ret) return ret; ret = register_reboot_notifier(&fw_shutdown_nb); if (ret) goto out; return register_sysfs_loader(); out: unregister_fw_pm_ops(); return ret; } static void __exit firmware_class_exit(void) { unregister_fw_pm_ops(); unregister_reboot_notifier(&fw_shutdown_nb); unregister_sysfs_loader(); } fs_initcall(firmware_class_init); module_exit(firmware_class_exit);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Driver for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright (C) 2001 Russell King. */ #include <linux/bits.h> #include <linux/serial_8250.h> #include <linux/serial_core.h> #include <linux/dmaengine.h> #include "../serial_mctrl_gpio.h" struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); void (*prepare_tx_dma)(struct uart_8250_port *p); void (*prepare_rx_dma)(struct uart_8250_port *p); /* Filter function */ dma_filter_fn fn; /* Parameter to the filter function */ void *rx_param; void *tx_param; struct dma_slave_config rxconf; struct dma_slave_config txconf; struct dma_chan *rxchan; struct dma_chan *txchan; /* Device address base for DMA operations */ phys_addr_t rx_dma_addr; phys_addr_t tx_dma_addr; /* DMA address of the buffer in memory */ dma_addr_t rx_addr; dma_addr_t tx_addr; dma_cookie_t rx_cookie; dma_cookie_t tx_cookie; void *rx_buf; size_t rx_size; size_t tx_size; unsigned char tx_running; unsigned char tx_err; unsigned char rx_running; }; struct old_serial_port { unsigned int uart; unsigned int baud_base; unsigned int port; unsigned int irq; upf_t flags; unsigned char io_type; unsigned char __iomem *iomem_base; unsigned short iomem_reg_shift; }; struct serial8250_config { const char *name; unsigned short fifo_size; unsigned short tx_loadsz; unsigned char fcr; unsigned char rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE]; unsigned int flags; }; #define UART_CAP_FIFO BIT(8) /* UART has FIFO */ #define UART_CAP_EFR BIT(9) /* UART has EFR */ #define UART_CAP_SLEEP BIT(10) /* UART has IER sleep */ #define UART_CAP_AFE BIT(11) /* MCR-based hw flow control */ #define UART_CAP_UUE BIT(12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE BIT(13) /* UART needs IER bit 4 set (Xscale, Tegra) */ #define UART_CAP_HFIFO BIT(14) /* UART has a "hidden" FIFO */ #define UART_CAP_RPM BIT(15) /* Runtime PM is active while idle */ #define UART_CAP_IRDA BIT(16) /* UART supports IrDA line discipline */ #define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ #define UART_CAP_NOTEMT BIT(18) /* UART without interrupt on TEMT available */ #define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ /* Module parameters */ #define UART_NR CONFIG_SERIAL_8250_NR_UARTS extern unsigned int nr_uarts; #ifdef CONFIG_SERIAL_8250_SHARE_IRQ #define SERIAL8250_SHARE_IRQS 1 #else #define SERIAL8250_SHARE_IRQS 0 #endif extern unsigned int share_irqs; extern unsigned int skip_txen_test; #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ { \ .iobase = _base, \ .irq = _irq, \ .uartclk = 1843200, \ .iotype = UPIO_PORT, \ .flags = UPF_BOOT_AUTOCONF | (_flags), \ } #define SERIAL8250_PORT(_base, _irq) SERIAL8250_PORT_FLAGS(_base, _irq, 0) extern struct uart_driver serial8250_reg; void serial8250_register_ports(struct uart_driver *drv, struct device *dev); /* Legacy ISA bus related APIs */ typedef void (*serial8250_isa_config_fn)(int, struct uart_port *, u32 *); extern serial8250_isa_config_fn serial8250_isa_config; void serial8250_isa_init_ports(void); extern struct platform_device *serial8250_isa_devs; extern const struct uart_ops *univ8250_port_base_ops; extern struct uart_ops univ8250_port_ops; static inline int serial_in(struct uart_8250_port *up, int offset) { return up->port.serial_in(&up->port, offset); } static inline void serial_out(struct uart_8250_port *up, int offset, int value) { up->port.serial_out(&up->port, offset, value); } /** * serial_lsr_in - Read LSR register and preserve flags across reads * @up: uart 8250 port * * Read LSR register and handle saving non-preserved flags across reads. * The flags that are not preserved across reads are stored into * up->lsr_saved_flags. * * Returns LSR value or'ed with the preserved flags (if any). */ static inline u16 serial_lsr_in(struct uart_8250_port *up) { u16 lsr = up->lsr_saved_flags; lsr |= serial_in(up, UART_LSR); up->lsr_saved_flags = lsr & up->lsr_save_mask; return lsr; } /* * For the 16C950 */ static void serial_icr_write(struct uart_8250_port *up, int offset, int value) { serial_out(up, UART_SCR, offset); serial_out(up, UART_ICR, value); } static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, int offset) { unsigned int value; serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); serial_out(up, UART_SCR, offset); value = serial_in(up, UART_ICR); serial_icr_write(up, UART_ACR, up->acr); return value; } void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); static inline u32 serial_dl_read(struct uart_8250_port *up) { return up->dl_read(up); } static inline void serial_dl_write(struct uart_8250_port *up, u32 value) { up->dl_write(up, value); } static inline bool serial8250_set_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (up->ier & UART_IER_THRI) return false; up->ier |= UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } static inline bool serial8250_clear_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } struct uart_8250_port *serial8250_setup_port(int index); struct uart_8250_port *serial8250_get_port(int line); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); void serial8250_rpm_get_tx(struct uart_8250_port *p); void serial8250_rpm_put_tx(struct uart_8250_port *p); int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485); void serial8250_em485_start_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_destroy(struct uart_8250_port *p); extern struct serial_rs485 serial8250_em485_supported; /* MCR <-> TIOCM conversion */ static inline int serial8250_TIOCM_to_MCR(int tiocm) { int mcr = 0; if (tiocm & TIOCM_RTS) mcr |= UART_MCR_RTS; if (tiocm & TIOCM_DTR) mcr |= UART_MCR_DTR; if (tiocm & TIOCM_OUT1) mcr |= UART_MCR_OUT1; if (tiocm & TIOCM_OUT2) mcr |= UART_MCR_OUT2; if (tiocm & TIOCM_LOOP) mcr |= UART_MCR_LOOP; return mcr; } static inline int serial8250_MCR_to_TIOCM(int mcr) { int tiocm = 0; if (mcr & UART_MCR_RTS) tiocm |= TIOCM_RTS; if (mcr & UART_MCR_DTR) tiocm |= TIOCM_DTR; if (mcr & UART_MCR_OUT1) tiocm |= TIOCM_OUT1; if (mcr & UART_MCR_OUT2) tiocm |= TIOCM_OUT2; if (mcr & UART_MCR_LOOP) tiocm |= TIOCM_LOOP; return tiocm; } /* MSR <-> TIOCM conversion */ static inline int serial8250_MSR_to_TIOCM(int msr) { int tiocm = 0; if (msr & UART_MSR_DCD) tiocm |= TIOCM_CAR; if (msr & UART_MSR_RI) tiocm |= TIOCM_RNG; if (msr & UART_MSR_DSR) tiocm |= TIOCM_DSR; if (msr & UART_MSR_CTS) tiocm |= TIOCM_CTS; return tiocm; } static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { serial_out(up, UART_MCR, value); if (up->gpios) mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value)); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { int mctrl; mctrl = serial_in(up, UART_MCR); if (up->gpios) { unsigned int mctrl_gpio = 0; mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio); } return mctrl; } #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); #else static inline int serial8250_pnp_init(void) { return 0; } static inline void serial8250_pnp_exit(void) { } #endif #ifdef CONFIG_SERIAL_8250_RSA void univ8250_rsa_support(struct uart_ops *ops); #else static inline void univ8250_rsa_support(struct uart_ops *ops) { } #endif #ifdef CONFIG_SERIAL_8250_FINTEK int fintek_8250_probe(struct uart_8250_port *uart); #else static inline int fintek_8250_probe(struct uart_8250_port *uart) { return 0; } #endif #ifdef CONFIG_ARCH_OMAP1 #include <linux/soc/ti/omap1-soc.h> static inline int is_omap1_8250(struct uart_8250_port *pt) { int res; switch (pt->port.mapbase) { case OMAP1_UART1_BASE: case OMAP1_UART2_BASE: case OMAP1_UART3_BASE: res = 1; break; default: res = 0; break; } return res; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { if (!cpu_is_omap1510()) return 0; return is_omap1_8250(pt); } #else static inline int is_omap1_8250(struct uart_8250_port *pt) { return 0; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { return 0; } #endif #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); extern void serial8250_tx_dma_flush(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); extern void serial8250_release_dma(struct uart_8250_port *); static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_tx_dma) dma->prepare_tx_dma(p); } static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_rx_dma) dma->prepare_rx_dma(p); } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; return dma && dma->tx_running; } #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_tx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_rx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_request_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_release_dma(struct uart_8250_port *p) { } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) { unsigned char status; status = serial_in(up, 0x04); /* EXCR2 */ #define PRESL(x) ((x) & 0x30) if (PRESL(status) == 0x10) { /* already in high speed mode */ return 0; } else { status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */ status |= 0x10; /* 1.625 divisor for baud_base --> 921600 */ serial_out(up, 0x04, status); } return 1; } static inline int serial_index(struct uart_port *port) { return port->minor - 64; }
4 4 4 4 3 3 1 1 1 1 4 4 1 1 4 4 3 4 2 1 4 1 4 4 1 4 2 3 3 1 2 1 3 2 2 1 1 1 1 1 1 1 1 3 1 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 // SPDX-License-Identifier: GPL-2.0-only /* * TCP NV: TCP with Congestion Avoidance * * TCP-NV is a successor of TCP-Vegas that has been developed to * deal with the issues that occur in modern networks. * Like TCP-Vegas, TCP-NV supports true congestion avoidance, * the ability to detect congestion before packet losses occur. * When congestion (queue buildup) starts to occur, TCP-NV * predicts what the cwnd size should be for the current * throughput and it reduces the cwnd proportionally to * the difference between the current cwnd and the predicted cwnd. * * NV is only recommeneded for traffic within a data center, and when * all the flows are NV (at least those within the data center). This * is due to the inherent unfairness between flows using losses to * detect congestion (congestion control) and those that use queue * buildup to detect congestion (congestion avoidance). * * Note: High NIC coalescence values may lower the performance of NV * due to the increased noise in RTT values. In particular, we have * seen issues with rx-frames values greater than 8. * * TODO: * 1) Add mechanism to deal with reverse congestion. */ #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> #include <linux/inet_diag.h> /* TCP NV parameters * * nv_pad Max number of queued packets allowed in network * nv_pad_buffer Do not grow cwnd if this closed to nv_pad * nv_reset_period How often (in) seconds)to reset min_rtt * nv_min_cwnd Don't decrease cwnd below this if there are no losses * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 * nv_rtt_factor RTT averaging factor * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping * slow-start due to congestion * nv_stop_rtt_cnt Only grow cwnd for this many RTTs after non-congestion * nv_rtt_min_cnt Wait these many RTTs before making congesion decision * nv_cwnd_growth_rate_neg * nv_cwnd_growth_rate_pos * How quickly to double growth rate (not rate) of cwnd when not * congested. One value (nv_cwnd_growth_rate_neg) for when * rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos) * otherwise. */ static int nv_pad __read_mostly = 10; static int nv_pad_buffer __read_mostly = 2; static int nv_reset_period __read_mostly = 5; /* in seconds */ static int nv_min_cwnd __read_mostly = 2; static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */ static int nv_cwnd_growth_rate_neg __read_mostly = 8; static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ static int nv_dec_eval_min_calls __read_mostly = 60; static int nv_inc_eval_min_calls __read_mostly = 20; static int nv_ssthresh_eval_min_calls __read_mostly = 30; static int nv_stop_rtt_cnt __read_mostly = 10; static int nv_rtt_min_cnt __read_mostly = 2; module_param(nv_pad, int, 0644); MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network"); module_param(nv_reset_period, int, 0644); MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)"); module_param(nv_min_cwnd, int, 0644); MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value" " without losses"); /* TCP NV Parameters */ struct tcpnv { unsigned long nv_min_rtt_reset_jiffies; /* when to switch to * nv_min_rtt_new */ s8 cwnd_growth_factor; /* Current cwnd growth factor, * < 0 => less than 1 packet/RTT */ u8 available8; u16 available16; u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */ nv_reset:1, /* whether to reset values */ nv_catchup:1; /* whether we are growing because * of temporary cwnd decrease */ u8 nv_eval_call_cnt; /* call count since last eval */ u8 nv_min_cwnd; /* nv won't make a ca decision if cwnd is * smaller than this. It may grow to handle * TSO, LRO and interrupt coalescence because * with these a small cwnd cannot saturate * the link. Note that this is different from * the file local nv_min_cwnd */ u8 nv_rtt_cnt; /* RTTs without making ca decision */; u32 nv_last_rtt; /* last rtt */ u32 nv_min_rtt; /* active min rtt. Used to determine slope */ u32 nv_min_rtt_new; /* min rtt for future use */ u32 nv_base_rtt; /* If non-zero it represents the threshold for * congestion */ u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is * set to 80% of nv_base_rtt. It helps reduce * unfairness between flows */ u32 nv_rtt_max_rate; /* max rate seen during current RTT */ u32 nv_rtt_start_seq; /* current RTT ends when packet arrives * acking beyond nv_rtt_start_seq */ u32 nv_last_snd_una; /* Previous value of tp->snd_una. It is * used to determine bytes acked since last * call to bictcp_acked */ u32 nv_no_cong_cnt; /* Consecutive no congestion decisions */ }; #define NV_INIT_RTT U32_MAX #define NV_MIN_CWND 4 #define NV_MIN_CWND_GROW 2 #define NV_TSO_CWND_BOUND 80 static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); ca->nv_reset = 0; ca->nv_no_cong_cnt = 0; ca->nv_rtt_cnt = 0; ca->nv_last_rtt = 0; ca->nv_rtt_max_rate = 0; ca->nv_rtt_start_seq = tp->snd_una; ca->nv_eval_call_cnt = 0; ca->nv_last_snd_una = tp->snd_una; } static void tcpnv_init(struct sock *sk) { struct tcpnv *ca = inet_csk_ca(sk); int base_rtt; tcpnv_reset(ca, sk); /* See if base_rtt is available from socket_ops bpf program. * It is meant to be used in environments, such as communication * within a datacenter, where we have reasonable estimates of * RTTs */ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL); if (base_rtt > 0) { ca->nv_base_rtt = base_rtt; ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ } else { ca->nv_base_rtt = 0; ca->nv_lower_bound_rtt = 0; } ca->nv_allow_cwnd_growth = 1; ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; ca->nv_min_rtt = NV_INIT_RTT; ca->nv_min_rtt_new = NV_INIT_RTT; ca->nv_min_cwnd = NV_MIN_CWND; ca->nv_catchup = 0; ca->cwnd_growth_factor = 0; } /* If provided, apply upper (base_rtt) and lower (lower_bound_rtt) * bounds to RTT. */ inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val) { if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt) return ca->nv_lower_bound_rtt; else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt) return ca->nv_base_rtt; else return val; } static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct tcpnv *ca = inet_csk_ca(sk); u32 cnt; if (!tcp_is_cwnd_limited(sk)) return; /* Only grow cwnd if NV has not detected congestion */ if (!ca->nv_allow_cwnd_growth) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } if (ca->cwnd_growth_factor < 0) { cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor; tcp_cong_avoid_ai(tp, cnt, acked); } else { cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor); tcp_cong_avoid_ai(tp, cnt, acked); } } static u32 tcpnv_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U); } static void tcpnv_state(struct sock *sk, u8 new_state) { struct tcpnv *ca = inet_csk_ca(sk); if (new_state == TCP_CA_Open && ca->nv_reset) { tcpnv_reset(ca, sk); } else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR || new_state == TCP_CA_Recovery) { ca->nv_reset = 1; ca->nv_allow_cwnd_growth = 0; if (new_state == TCP_CA_Loss) { /* Reset cwnd growth factor to Reno value */ if (ca->cwnd_growth_factor > 0) ca->cwnd_growth_factor = 0; /* Decrease growth rate if allowed */ if (nv_cwnd_growth_rate_neg > 0 && ca->cwnd_growth_factor > -8) ca->cwnd_growth_factor--; } } } /* Do congestion avoidance calculations for TCP-NV */ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcpnv *ca = inet_csk_ca(sk); unsigned long now = jiffies; u64 rate64; u32 rate, max_win, cwnd_by_slope; u32 avg_rtt; u32 bytes_acked = 0; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; /* If not in TCP_CA_Open or TCP_CA_Disorder states, skip. */ if (icsk->icsk_ca_state != TCP_CA_Open && icsk->icsk_ca_state != TCP_CA_Disorder) return; /* Stop cwnd growth if we were in catch up mode */ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) { ca->nv_catchup = 0; ca->nv_allow_cwnd_growth = 0; } bytes_acked = tp->snd_una - ca->nv_last_snd_una; ca->nv_last_snd_una = tp->snd_una; if (sample->in_flight == 0) return; /* Calculate moving average of RTT */ if (nv_rtt_factor > 0) { if (ca->nv_last_rtt > 0) { avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor + ((u64)ca->nv_last_rtt) * (256 - nv_rtt_factor)) >> 8; } else { avg_rtt = sample->rtt_us; ca->nv_min_rtt = avg_rtt << 1; } ca->nv_last_rtt = avg_rtt; } else { avg_rtt = sample->rtt_us; } /* rate in 100's bits per second */ rate64 = ((u64)sample->in_flight) * 80000; do_div(rate64, avg_rtt ?: 1); rate = (u32)rate64; /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be * called at least nv_dec_eval_min_calls times. */ if (ca->nv_rtt_max_rate < rate) ca->nv_rtt_max_rate = rate; /* We have valid information, increment counter */ if (ca->nv_eval_call_cnt < 255) ca->nv_eval_call_cnt++; /* Apply bounds to rtt. Only used to update min_rtt */ avg_rtt = nv_get_bounded_rtt(ca, avg_rtt); /* update min rtt if necessary */ if (avg_rtt < ca->nv_min_rtt) ca->nv_min_rtt = avg_rtt; /* update future min_rtt if necessary */ if (avg_rtt < ca->nv_min_rtt_new) ca->nv_min_rtt_new = avg_rtt; /* nv_min_rtt is updated with the minimum (possibley averaged) rtt * seen in the last sysctl_tcp_nv_reset_period seconds (i.e. a * warm reset). This new nv_min_rtt will be continued to be updated * and be used for another sysctl_tcp_nv_reset_period seconds, * when it will be updated again. * In practice we introduce some randomness, so the actual period used * is chosen randomly from the range: * [sysctl_tcp_nv_reset_period*3/4, sysctl_tcp_nv_reset_period*5/4) */ if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) { unsigned char rand; ca->nv_min_rtt = ca->nv_min_rtt_new; ca->nv_min_rtt_new = NV_INIT_RTT; get_random_bytes(&rand, 1); ca->nv_min_rtt_reset_jiffies = now + ((nv_reset_period * (384 + rand) * HZ) >> 9); /* Every so often we decrease ca->nv_min_cwnd in case previous * value is no longer accurate. */ ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND); } /* Once per RTT check if we need to do congestion avoidance */ if (before(ca->nv_rtt_start_seq, tp->snd_una)) { ca->nv_rtt_start_seq = tp->snd_nxt; if (ca->nv_rtt_cnt < 0xff) /* Increase counter for RTTs without CA decision */ ca->nv_rtt_cnt++; /* If this function is only called once within an RTT * the cwnd is probably too small (in some cases due to * tso, lro or interrupt coalescence), so we increase * ca->nv_min_cwnd. */ if (ca->nv_eval_call_cnt == 1 && bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache && ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) { ca->nv_min_cwnd = min(ca->nv_min_cwnd + NV_MIN_CWND_GROW, NV_TSO_CWND_BOUND + 1); ca->nv_rtt_start_seq = tp->snd_nxt + ca->nv_min_cwnd * tp->mss_cache; ca->nv_eval_call_cnt = 0; ca->nv_allow_cwnd_growth = 1; return; } /* Find the ideal cwnd for current rate from slope * slope = 80000.0 * mss / nv_min_rtt * cwnd_by_slope = nv_rtt_max_rate / slope */ cwnd_by_slope = (u32) div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt, 80000ULL * tp->mss_cache); max_win = cwnd_by_slope + nv_pad; /* If cwnd > max_win, decrease cwnd * if cwnd < max_win, grow cwnd * else leave the same */ if (tcp_snd_cwnd(tp) > max_win) { /* there is congestion, check that it is ok * to make a CA decision * 1. We should have at least nv_dec_eval_min_calls * data points before making a CA decision * 2. We only make a congesion decision after * nv_rtt_min_cnt RTTs */ if (ca->nv_rtt_cnt < nv_rtt_min_cnt) { return; } else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) { if (ca->nv_eval_call_cnt < nv_ssthresh_eval_min_calls) return; /* otherwise we will decrease cwnd */ } else if (ca->nv_eval_call_cnt < nv_dec_eval_min_calls) { if (ca->nv_allow_cwnd_growth && ca->nv_rtt_cnt > nv_stop_rtt_cnt) ca->nv_allow_cwnd_growth = 0; return; } /* We have enough data to determine we are congested */ ca->nv_allow_cwnd_growth = 0; tp->snd_ssthresh = (nv_ssthresh_factor * max_win) >> 3; if (tcp_snd_cwnd(tp) - max_win > 2) { /* gap > 2, we do exponential cwnd decrease */ int dec; dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) * nv_cong_dec_mult) >> 7); tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec); } else if (nv_cong_dec_mult > 0) { tcp_snd_cwnd_set(tp, max_win); } if (ca->cwnd_growth_factor > 0) ca->cwnd_growth_factor = 0; ca->nv_no_cong_cnt = 0; } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) { /* There is no congestion, grow cwnd if allowed*/ if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls) return; ca->nv_allow_cwnd_growth = 1; ca->nv_no_cong_cnt++; if (ca->cwnd_growth_factor < 0 && nv_cwnd_growth_rate_neg > 0 && ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) { ca->cwnd_growth_factor++; ca->nv_no_cong_cnt = 0; } else if (ca->cwnd_growth_factor >= 0 && nv_cwnd_growth_rate_pos > 0 && ca->nv_no_cong_cnt > nv_cwnd_growth_rate_pos) { ca->cwnd_growth_factor++; ca->nv_no_cong_cnt = 0; } } else { /* cwnd is in-between, so do nothing */ return; } /* update state */ ca->nv_eval_call_cnt = 0; ca->nv_rtt_cnt = 0; ca->nv_rtt_max_rate = 0; /* Don't want to make cwnd < nv_min_cwnd * (it wasn't before, if it is now is because nv * decreased it). */ if (tcp_snd_cwnd(tp) < nv_min_cwnd) tcp_snd_cwnd_set(tp, nv_min_cwnd); } } /* Extract info for Tcp socket info provided via netlink */ static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct tcpnv *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = 1; info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt; info->vegas.tcpv_rtt = ca->nv_last_rtt; info->vegas.tcpv_minrtt = ca->nv_min_rtt; *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); } return 0; } static struct tcp_congestion_ops tcpnv __read_mostly = { .init = tcpnv_init, .ssthresh = tcpnv_recalc_ssthresh, .cong_avoid = tcpnv_cong_avoid, .set_state = tcpnv_state, .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = tcpnv_acked, .get_info = tcpnv_get_info, .owner = THIS_MODULE, .name = "nv", }; static int __init tcpnv_register(void) { BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcpnv); } static void __exit tcpnv_unregister(void) { tcp_unregister_congestion_control(&tcpnv); } module_init(tcpnv_register); module_exit(tcpnv_unregister); MODULE_AUTHOR("Lawrence Brakmo"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP NV"); MODULE_VERSION("1.0");
26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET An implementation of the SOCKET network access protocol. * This is the master header file for the Linux NET layer, * or, in plain English: the networking handling part of the * kernel. * * Version: @(#)net.h 1.0.3 05/25/93 * * Authors: Orest Zborowski, <obz@Kodak.COM> * Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_NET_H #define _LINUX_NET_H #include <linux/stringify.h> #include <linux/random.h> #include <linux/wait.h> #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ #include <linux/rcupdate.h> #include <linux/once.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/sockptr.h> #include <uapi/linux/net.h> struct poll_table_struct; struct pipe_inode_info; struct inode; struct file; struct net; /* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. * Eventually all flags will be in sk->sk_wq->flags. */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 #define SOCK_SUPPORT_ZC 5 #define SOCK_CUSTOM_SOCKOPT 6 #define SOCK_PASSPIDFD 7 #ifndef ARCH_HAS_SOCKET_TYPES /** * enum sock_type - Socket types * @SOCK_STREAM: stream (connection) socket * @SOCK_DGRAM: datagram (conn.less) socket * @SOCK_RAW: raw socket * @SOCK_RDM: reliably-delivered message * @SOCK_SEQPACKET: sequential packet socket * @SOCK_DCCP: Datagram Congestion Control Protocol socket * @SOCK_PACKET: linux specific way of getting packets at the dev level. * For writing rarp and other similar things on the user level. * * When adding some new socket type please * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS * overrides this enum for binary compat reasons. */ enum sock_type { SOCK_STREAM = 1, SOCK_DGRAM = 2, SOCK_RAW = 3, SOCK_RDM = 4, SOCK_SEQPACKET = 5, SOCK_DCCP = 6, SOCK_PACKET = 10, }; #define SOCK_MAX (SOCK_PACKET + 1) /* Mask which covers at least up to SOCK_MASK-1. The * remaining bits are used as flags. */ #define SOCK_TYPE_MASK 0xf /* Flags for socket, socketpair, accept4 */ #define SOCK_CLOEXEC O_CLOEXEC #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #endif /* ARCH_HAS_SOCKET_TYPES */ /** * enum sock_shutdown_cmd - Shutdown types * @SHUT_RD: shutdown receptions * @SHUT_WR: shutdown transmissions * @SHUT_RDWR: shutdown receptions/transmissions */ enum sock_shutdown_cmd { SHUT_RD, SHUT_WR, SHUT_RDWR, }; struct socket_wq { /* Note: wait MUST be first field of socket_wq */ wait_queue_head_t wait; struct fasync_struct *fasync_list; unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */ struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) * @type: socket type (%SOCK_STREAM, etc) * @flags: socket flags (%SOCK_NOSPACE, etc) * @ops: protocol specific socket operations * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation * @wq: wait queue for several uses */ struct socket { socket_state state; short type; unsigned long flags; struct file *file; struct sock *sk; const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */ struct socket_wq wq; }; /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet * have multiple different users of the data that * we read from a file. * * The simplest case just copies the data to user * mode. */ typedef struct { size_t written; size_t count; union { char __user *buf; void *data; } arg; int error; } read_descriptor_t; struct vm_area_struct; struct page; struct sockaddr; struct msghdr; struct module; struct sk_buff; struct proto_accept_arg; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); struct proto_ops { int family; struct module *owner; int (*release) (struct socket *sock); int (*bind) (struct socket *sock, struct sockaddr *myaddr, int sockaddr_len); int (*connect) (struct socket *sock, struct sockaddr *vaddr, int sockaddr_len, int flags); int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg); int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT int (*compat_ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); #endif int (*gettstamp) (struct socket *sock, void __user *userstamp, bool timeval, bool time32); int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); /* Notes for implementing recvmsg: * =============================== * msg->msg_namelen should get updated by the recvmsg handlers * iff msg_name != NULL. It is by default 0 to prevent * returning uninitialized memory to user space. The recvfrom * handlers can assume that msg.msg_name is either NULL or has * a minimum size of sizeof(struct sockaddr_storage). */ int (*recvmsg) (struct socket *sock, struct msghdr *m, size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); void (*splice_eof)(struct socket *sock); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); /* The following functions are called internally by kernel with * sock lock already held. */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); /* This is different from read_sock(), it reads an entire skb at a time. */ int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, size_t size); int (*set_rcvlowat)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; }) struct net_proto_family { int family; int (*create)(struct net *net, struct socket *sock, int protocol, int kern); struct module *owner; }; struct iovec; struct kvec; enum { SOCK_WAKE_IO, SOCK_WAKE_WAITD, SOCK_WAKE_SPACE, SOCK_WAKE_URG, }; int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); bool sock_is_registered(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); struct socket *sock_alloc(void); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); struct socket *sockfd_lookup(int fd, int *err); struct socket *sock_from_file(struct file *file); #define sockfd_put(sock) fput(sock->file) int net_ratelimit(void); #define net_ratelimited_function(function, ...) \ do { \ if (net_ratelimit()) \ function(__VA_ARGS__); \ } while (0) #define net_emerg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__) #define net_alert_ratelimited(fmt, ...) \ net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__) #define net_crit_ratelimited(fmt, ...) \ net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__) #define net_err_ratelimited(fmt, ...) \ net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__) #define net_notice_ratelimited(fmt, ...) \ net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__) #define net_warn_ratelimited(fmt, ...) \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define net_dbg_ratelimited(fmt, ...) \ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ net_ratelimit()) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) /* * E.g. XFS meta- & log-data is in slab pages, or bcache meta * data pages, or other high order pages allocated by * __get_free_pages() without __GFP_COMP, which have a page_count * of 0 and/or have PageSlab() set. We cannot use send_page for * those, as that does get_page(); put_page(); and would cause * either a VM_BUG directly, or __page_cache_release a page that * would actually still be referenced by someone, leading to some * obscure delayed Oops somewhere else. */ static inline bool sendpage_ok(struct page *page) { return !PageSlab(page) && page_count(page) >= 1; } /* * Check sendpage_ok on contiguous pages. */ static inline bool sendpages_ok(struct page *page, size_t len, size_t offset) { struct page *p = page + (offset >> PAGE_SHIFT); size_t count = 0; while (count < len) { if (!sendpage_ok(p)) return false; p++; count += PAGE_SIZE; } return true; } int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen); int kernel_listen(struct socket *sock, int backlog); int kernel_accept(struct socket *sock, struct socket **newsock, int flags); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); /* Routine returns the IP overhead imposed by a (caller-protected) socket. */ u32 kernel_sock_ip_overhead(struct sock *sk); #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) #define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \ "-type-" __stringify(type)) #define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \ name) #endif /* _LINUX_NET_H */
1 1 3 2 1 1 3 1 2 4 4 4 3 3 4 3 1 3 1 1 3 1 1 1 2 1 1 1 3 1 1 1 6 6 2 3 1 4 3 1 1 3 3 1 1 1 4 1 1 1 1 4 2 1 1 5 5 2 2 1 1 1 1 3 1 1 1 1 3 1 1 1 1 5 2 3 3 3 1 2 2 2 2 1 1 3 3 5 2 1 2 4 2 1 1 1 1 1 1 1 1 2 1 1 2 1 1 404 70 338 338 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2011 Instituto Nokia de Tecnologia * * Authors: * Lauro Ramos Venancio <lauro.venancio@openbossa.org> * Aloisio Almeida Jr <aloisio.almeida@openbossa.org> * * Vendor commands implementation based on net/wireless/nl80211.c * which is: * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ #include <net/genetlink.h> #include <linux/nfc.h> #include <linux/slab.h> #include "nfc.h" #include "llcp.h" static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, .len = NFC_DEVICE_NAME_MAXSIZE }, [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { [NFC_SDP_ATTR_URI] = { .type = NLA_STRING, .len = U8_MAX - 4 }, [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, struct netlink_callback *cb, int flags) { void *hdr; hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) || nla_put_u16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res) || nla_put_u8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res)) goto nla_put_failure; if (target->nfcid1_len > 0 && nla_put(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len, target->nfcid1)) goto nla_put_failure; if (target->sensb_res_len > 0 && nla_put(msg, NFC_ATTR_TARGET_SENSB_RES, target->sensb_res_len, target->sensb_res)) goto nla_put_failure; if (target->sensf_res_len > 0 && nla_put(msg, NFC_ATTR_TARGET_SENSF_RES, target->sensf_res_len, target->sensf_res)) goto nla_put_failure; if (target->is_iso15693) { if (nla_put_u8(msg, NFC_ATTR_TARGET_ISO15693_DSFID, target->iso15693_dsfid) || nla_put(msg, NFC_ATTR_TARGET_ISO15693_UID, sizeof(target->iso15693_uid), target->iso15693_uid)) goto nla_put_failure; } if (target->ats_len > 0 && nla_put(msg, NFC_ATTR_TARGET_ATS, target->ats_len, target->ats)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct nfc_dev *dev; u32 idx; if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return ERR_PTR(-ENODEV); return dev; } static int nfc_genl_dump_targets(struct sk_buff *skb, struct netlink_callback *cb) { int i = cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; int rc; if (!dev) { dev = __get_device_from_cb(cb); if (IS_ERR(dev)) return PTR_ERR(dev); cb->args[1] = (long) dev; } device_lock(&dev->dev); cb->seq = dev->targets_generation; while (i < dev->n_targets) { rc = nfc_genl_send_target(skb, &dev->targets[i], cb, NLM_F_MULTI); if (rc < 0) break; i++; } device_unlock(&dev->dev); cb->args[0] = i; return skb->len; } static int nfc_genl_dump_targets_done(struct netlink_callback *cb) { struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; if (dev) nfc_put_device(dev); return 0; } int nfc_genl_targets_found(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TARGETS_FOUND); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TARGET_LOST); if (!hdr) goto free_msg; if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TM_ACTIVATED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (nla_put_u32(msg, NFC_ATTR_TM_PROTOCOLS, protocol)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_tm_deactivated(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_TM_DEACTIVATED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg) { if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) return -1; return 0; } int nfc_genl_device_added(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_DEVICE_ADDED); if (!hdr) goto free_msg; if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_device_removed(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_DEVICE_REMOVED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) { struct sk_buff *msg; struct nlattr *sdp_attr, *uri_attr; struct nfc_llcp_sdp_tlv *sdres; struct hlist_node *n; void *hdr; int rc = -EMSGSIZE; int i; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_LLC_SDRES); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP); if (sdp_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; } i = 1; hlist_for_each_entry_safe(sdres, n, sdres_list, node) { pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); uri_attr = nla_nest_start_noflag(msg, i++); if (uri_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; } if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap)) goto nla_put_failure; if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri)) goto nla_put_failure; nla_nest_end(msg, uri_attr); hlist_del(&sdres->node); nfc_llcp_free_sdp_tlv(sdres); } nla_nest_end(msg, sdp_attr); genlmsg_end(msg, hdr); return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); nla_put_failure: free_msg: nlmsg_free(msg); nfc_llcp_free_sdp_tlv_list(sdres_list); return rc; } int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_ADDED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_REMOVED); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction) { struct nfc_se *se; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_TRANSACTION); if (!hdr) goto free_msg; se = nfc_find_se(dev, se_idx); if (!se) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) || nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len, evt_transaction->aid) || nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len, evt_transaction->params)) goto nla_put_failure; /* evt_transaction is no more used */ devm_kfree(&dev->dev, evt_transaction); genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: /* evt_transaction is no more used */ devm_kfree(&dev->dev, evt_transaction); nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) { const struct nfc_se *se; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_EVENT_SE_CONNECTIVITY); if (!hdr) goto free_msg; se = nfc_find_se(dev, se_idx); if (!se) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; if (cb) genl_dump_check_consistent(cb, hdr); if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_dump_devices(struct sk_buff *skb, struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; bool first_call = false; if (!iter) { first_call = true; iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long) iter; } mutex_lock(&nfc_devlist_mutex); cb->seq = nfc_devlist_generation; if (first_call) { nfc_device_iter_init(iter); dev = nfc_device_iter_next(iter); } while (dev) { int rc; rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; dev = nfc_device_iter_next(iter); } mutex_unlock(&nfc_devlist_mutex); cb->args[1] = (long) dev; return skb->len; } static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; if (iter) { nfc_device_iter_exit(iter); kfree(iter); } return 0; } int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode) { struct sk_buff *msg; void *hdr; pr_debug("DEP link is up\n"); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_DEP_LINK_UP); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (rf_mode == NFC_RF_INITIATOR && nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx)) goto nla_put_failure; if (nla_put_u8(msg, NFC_ATTR_COMM_MODE, comm_mode) || nla_put_u8(msg, NFC_ATTR_RF_MODE, rf_mode)) goto nla_put_failure; genlmsg_end(msg, hdr); dev->dep_link_up = true; genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } int nfc_genl_dep_link_down_event(struct nfc_dev *dev) { struct sk_buff *msg; void *hdr; pr_debug("DEP link is down\n"); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_DEP_LINK_DOWN); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct nfc_dev *dev; u32 idx; int rc = -ENOBUFS; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto out_putdev; } rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; nfc_put_device(dev); return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); out_putdev: nfc_put_device(dev); return rc; } static int nfc_genl_dev_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dev_up(dev); nfc_put_device(dev); return rc; } static int nfc_genl_dev_down(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dev_down(dev); nfc_put_device(dev); return rc; } static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; u32 im_protocols = 0, tm_protocols = 0; pr_debug("Poll start\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || ((!info->attrs[NFC_ATTR_IM_PROTOCOLS] && !info->attrs[NFC_ATTR_PROTOCOLS]) && !info->attrs[NFC_ATTR_TM_PROTOCOLS])) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); if (info->attrs[NFC_ATTR_TM_PROTOCOLS]) tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_TM_PROTOCOLS]); if (info->attrs[NFC_ATTR_IM_PROTOCOLS]) im_protocols = nla_get_u32(info->attrs[NFC_ATTR_IM_PROTOCOLS]); else if (info->attrs[NFC_ATTR_PROTOCOLS]) im_protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; mutex_lock(&dev->genl_data.genl_data_mutex); rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); nfc_put_device(dev); return rc; } static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); if (!dev->polling) { device_unlock(&dev->dev); nfc_put_device(dev); return -EINVAL; } device_unlock(&dev->dev); mutex_lock(&dev->genl_data.genl_data_mutex); if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); nfc_put_device(dev); return rc; } static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; u32 device_idx, target_idx, protocol; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_TARGET_INDEX] || !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(device_idx); if (!dev) return -ENODEV; target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); rc = nfc_activate_target(dev, target_idx, protocol); nfc_put_device(dev); return rc; } static int nfc_genl_deactivate_target(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; u32 device_idx, target_idx; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(device_idx); if (!dev) return -ENODEV; target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); nfc_put_device(dev); return rc; } static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc, tgt_idx; u32 idx; u8 comm; pr_debug("DEP link up\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_COMM_MODE]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); if (!info->attrs[NFC_ATTR_TARGET_INDEX]) tgt_idx = NFC_TARGET_IDX_ANY; else tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]); if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE) return -EINVAL; dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dep_link_up(dev, tgt_idx, comm); nfc_put_device(dev); return rc; } static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_dep_link_down(dev); nfc_put_device(dev); return rc; } static int nfc_genl_send_params(struct sk_buff *msg, struct nfc_llcp_local *local, u32 portid, u32 seq) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, 0, NFC_CMD_LLC_GET_PARAMS); if (!hdr) return -EMSGSIZE; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, local->dev->idx) || nla_put_u8(msg, NFC_ATTR_LLC_PARAM_LTO, local->lto) || nla_put_u8(msg, NFC_ATTR_LLC_PARAM_RW, local->rw) || nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; int rc = 0; struct sk_buff *msg = NULL; u32 idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto put_local; } rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); if (rc < 0) { if (msg) nlmsg_free(msg); return rc; } return genlmsg_reply(msg, info); } static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; u8 rw = 0; u16 miux = 0; u32 idx; int rc = 0; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || (!info->attrs[NFC_ATTR_LLC_PARAM_LTO] && !info->attrs[NFC_ATTR_LLC_PARAM_RW] && !info->attrs[NFC_ATTR_LLC_PARAM_MIUX])) return -EINVAL; if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) { rw = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_RW]); if (rw > LLCP_MAX_RW) return -EINVAL; } if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) { miux = nla_get_u16(info->attrs[NFC_ATTR_LLC_PARAM_MIUX]); if (miux > LLCP_MAX_MIUX) return -EINVAL; } idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { if (dev->dep_link_up) { rc = -EINPROGRESS; goto put_local; } local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); } if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) local->rw = rw; if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) local->miux = cpu_to_be16(miux); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); return rc; } static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct nfc_llcp_local *local; struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1]; u32 idx; u8 tid; char *uri; int rc = 0, rem; size_t uri_len, tlvs_len; struct hlist_head sdreq_list; struct nfc_llcp_sdp_tlv *sdreq; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_LLC_SDP]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; device_lock(&dev->dev); if (dev->dep_link_up == false) { rc = -ENOLINK; goto exit; } local = nfc_llcp_find_local(dev); if (!local) { rc = -ENODEV; goto exit; } INIT_HLIST_HEAD(&sdreq_list); tlvs_len = 0; nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX, attr, nfc_sdp_genl_policy, info->extack); if (rc != 0) { rc = -EINVAL; goto put_local; } if (!sdp_attrs[NFC_SDP_ATTR_URI]) continue; uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]); if (uri_len == 0) continue; uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); if (uri == NULL || *uri == 0) continue; tid = local->sdreq_next_tid++; sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); if (sdreq == NULL) { rc = -ENOMEM; goto put_local; } tlvs_len += sdreq->tlv_len; hlist_add_head(&sdreq->node, &sdreq_list); } if (hlist_empty(&sdreq_list)) { rc = -EINVAL; goto put_local; } rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); put_local: nfc_llcp_local_put(local); exit: device_unlock(&dev->dev); nfc_put_device(dev); return rc; } static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx; char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; nla_strscpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, u32 result) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; nla_put_failure: free_msg: nlmsg_free(msg); return -EMSGSIZE; } static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx, se_idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_enable_se(dev, se_idx); nfc_put_device(dev); return rc; } static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; u32 idx, se_idx; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(idx); if (!dev) return -ENODEV; rc = nfc_disable_se(dev, se_idx); nfc_put_device(dev); return rc; } static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; struct nfc_se *se, *n; list_for_each_entry_safe(se, n, &dev->secure_elements, list) { hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_SE); if (!hdr) goto nla_put_failure; if (cb) genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) || nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; genlmsg_end(msg, hdr); } return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nfc_genl_dump_ses(struct sk_buff *skb, struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; bool first_call = false; if (!iter) { first_call = true; iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long) iter; } mutex_lock(&nfc_devlist_mutex); cb->seq = nfc_devlist_generation; if (first_call) { nfc_device_iter_init(iter); dev = nfc_device_iter_next(iter); } while (dev) { int rc; rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; dev = nfc_device_iter_next(iter); } mutex_unlock(&nfc_devlist_mutex); cb->args[1] = (long) dev; return skb->len; } static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; if (iter) { nfc_device_iter_exit(iter); kfree(iter); } return 0; } static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context) { struct nfc_se *se; int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); device_lock(&dev->dev); if (!device_is_registered(&dev->dev)) { rc = -ENODEV; goto error; } if (!dev->dev_up) { rc = -ENODEV; goto error; } if (!dev->ops->se_io) { rc = -EOPNOTSUPP; goto error; } se = nfc_find_se(dev, se_idx); if (!se) { rc = -EINVAL; goto error; } if (se->state != NFC_SE_ENABLED) { rc = -ENODEV; goto error; } rc = dev->ops->se_io(dev, se_idx, apdu, apdu_length, cb, cb_context); device_unlock(&dev->dev); return rc; error: device_unlock(&dev->dev); kfree(cb_context); return rc; } struct se_io_ctx { u32 dev_idx; u32 se_idx; }; static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err) { struct se_io_ctx *ctx = context; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { kfree(ctx); return; } hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_SE_IO); if (!hdr) goto free_msg; if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, ctx->dev_idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, ctx->se_idx) || nla_put(msg, NFC_ATTR_SE_APDU, apdu_len, apdu)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); kfree(ctx); return; nla_put_failure: free_msg: nlmsg_free(msg); kfree(ctx); return; } static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; struct se_io_ctx *ctx; u32 dev_idx, se_idx; u8 *apdu; size_t apdu_len; int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX] || !info->attrs[NFC_ATTR_SE_APDU]) return -EINVAL; dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); dev = nfc_get_device(dev_idx); if (!dev) return -ENODEV; if (!dev->ops || !dev->ops->se_io) { rc = -EOPNOTSUPP; goto put_dev; } apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); if (apdu_len == 0) { rc = -EINVAL; goto put_dev; } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); if (!apdu) { rc = -EINVAL; goto put_dev; } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); if (!ctx) { rc = -ENOMEM; goto put_dev; } ctx->dev_idx = dev_idx; ctx->se_idx = se_idx; rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); put_dev: nfc_put_device(dev); return rc; } static int nfc_genl_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; const struct nfc_vendor_cmd *cmd; u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; int i, err; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_VENDOR_ID] || !info->attrs[NFC_ATTR_VENDOR_SUBCMD]) return -EINVAL; dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]); subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); dev = nfc_get_device(dev_idx); if (!dev) return -ENODEV; if (!dev->vendor_cmds || !dev->n_vendor_cmds) { err = -ENODEV; goto put_dev; } if (info->attrs[NFC_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); if (data_len == 0) { err = -EINVAL; goto put_dev; } } else { data = NULL; data_len = 0; } for (i = 0; i < dev->n_vendor_cmds; i++) { cmd = &dev->vendor_cmds[i]; if (cmd->vendor_id != vid || cmd->subcmd != subcmd) continue; dev->cur_cmd_info = info; err = cmd->doit(dev, data, data_len); dev->cur_cmd_info = NULL; goto put_dev; } err = -EOPNOTSUPP; put_dev: nfc_put_device(dev); return err; } /* message building helper */ static inline void *nfc_hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ return genlmsg_put(skb, portid, seq, &nfc_genl_family, flags, cmd); } static struct sk_buff * __nfc_alloc_vendor_cmd_skb(struct nfc_dev *dev, int approxlen, u32 portid, u32 seq, enum nfc_attrs attr, u32 oui, u32 subcmd, gfp_t gfp) { struct sk_buff *skb; void *hdr; skb = nlmsg_new(approxlen + 100, gfp); if (!skb) return NULL; hdr = nfc_hdr_put(skb, portid, seq, 0, NFC_CMD_VENDOR); if (!hdr) { kfree_skb(skb); return NULL; } if (nla_put_u32(skb, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; if (nla_put_u32(skb, NFC_ATTR_VENDOR_ID, oui)) goto nla_put_failure; if (nla_put_u32(skb, NFC_ATTR_VENDOR_SUBCMD, subcmd)) goto nla_put_failure; ((void **)skb->cb)[0] = dev; ((void **)skb->cb)[1] = hdr; return skb; nla_put_failure: kfree_skb(skb); return NULL; } struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, enum nfc_attrs attr, u32 oui, u32 subcmd, int approxlen) { if (WARN_ON(!dev->cur_cmd_info)) return NULL; return __nfc_alloc_vendor_cmd_skb(dev, approxlen, dev->cur_cmd_info->snd_portid, dev->cur_cmd_info->snd_seq, attr, oui, subcmd, GFP_KERNEL); } EXPORT_SYMBOL(__nfc_alloc_vendor_cmd_reply_skb); int nfc_vendor_cmd_reply(struct sk_buff *skb) { struct nfc_dev *dev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); if (WARN_ON(!dev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; } genlmsg_end(skb, hdr); return genlmsg_reply(skb, dev->cur_cmd_info); } EXPORT_SYMBOL(nfc_vendor_cmd_reply); static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, }, { .cmd = NFC_CMD_DEV_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEV_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_START_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_STOP_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_get_params, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_LLC_SDREQ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ENABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DISABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, }, { .cmd = NFC_CMD_SE_IO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family nfc_genl_family __ro_after_init = { .hdrsize = 0, .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, .maxattr = NFC_ATTR_MAX, .policy = nfc_genl_policy, .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), .resv_start_op = NFC_CMD_DEACTIVATE_TARGET + 1, .mcgrps = nfc_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), }; struct urelease_work { struct work_struct w; u32 portid; }; static void nfc_urelease_event_work(struct work_struct *work) { struct urelease_work *w = container_of(work, struct urelease_work, w); struct class_dev_iter iter; struct nfc_dev *dev; pr_debug("portid %d\n", w->portid); mutex_lock(&nfc_devlist_mutex); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { mutex_lock(&dev->genl_data.genl_data_mutex); if (dev->genl_data.poll_req_portid == w->portid) { nfc_stop_poll(dev); dev->genl_data.poll_req_portid = 0; } mutex_unlock(&dev->genl_data.genl_data_mutex); dev = nfc_device_iter_next(&iter); } nfc_device_iter_exit(&iter); mutex_unlock(&nfc_devlist_mutex); kfree(w); } static int nfc_genl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; struct urelease_work *w; if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); w = kmalloc(sizeof(*w), GFP_ATOMIC); if (w) { INIT_WORK(&w->w, nfc_urelease_event_work); w->portid = n->portid; schedule_work(&w->w); } out: return NOTIFY_DONE; } void nfc_genl_data_init(struct nfc_genl_data *genl_data) { genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } void nfc_genl_data_exit(struct nfc_genl_data *genl_data) { mutex_destroy(&genl_data->genl_data_mutex); } static struct notifier_block nl_notifier = { .notifier_call = nfc_genl_rcv_nl_event, }; /** * nfc_genl_init() - Initialize netlink interface * * This initialization function registers the nfc netlink family. */ int __init nfc_genl_init(void) { int rc; rc = genl_register_family(&nfc_genl_family); if (rc) return rc; netlink_register_notifier(&nl_notifier); return 0; } /** * nfc_genl_exit() - Deinitialize netlink interface * * This exit function unregisters the nfc netlink family. */ void nfc_genl_exit(void) { netlink_unregister_notifier(&nl_notifier); genl_unregister_family(&nfc_genl_family); }
520 519 520 520 519 12 12 5 518 518 517 518 517 505 437 438 16 426 517 517 517 517 497 436 6 431 6 432 436 449 446 446 442 435 449 446 1 1 11 9 2 1 2 1 1 17 15 15 17 17 15 17 15 15 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/base.c * * Copyright (C) 1991, 1992 Linus Torvalds * * proc base directory handling functions * * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. * Instead of using magical inumbers to determine the kind of object * we allocate and fill in-core inodes upon lookup. They don't even * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. * * * Changelog: * 17-Jan-2005 * Allan Bezerra * Bruna Moreira <bruna.moreira@indt.org.br> * Edjard Mota <edjard.mota@indt.org.br> * Ilias Biris <ilias.biris@indt.org.br> * Mauricio Lin <mauricio.lin@indt.org.br> * * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * * A new process specific entry (smaps) included in /proc. It shows the * size of rss for each memory area. The maps entry lacks information * about physical memory size (rss) for each mapped file, i.e., * rss information for executables and library files. * This additional information is useful for any tools that need to know * about physical memory consumption for a process specific library. * * Changelog: * 21-Feb-2005 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * Pud inclusion in the page table walking. * * ChangeLog: * 10-Mar-2005 * 10LE Instituto Nokia de Tecnologia - INdT: * A better way to walks through the page table as suggested by Hugh Dickins. * * Simo Piiroinen <simo.piiroinen@nokia.com>: * Smaps information related to shared, private, clean and dirty pages. * * Paul Mundt <paul.mundt@nokia.com>: * Overall revision about smaps. */ #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/generic-radix-tree.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> #include <linux/mnt_namespace.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> #include <linux/stacktrace.h> #include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/printk.h> #include <linux/cache.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/fs_parser.h> #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/sched/autogroup.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/debug.h> #include <linux/sched/stat.h> #include <linux/posix-timers.h> #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> #include <linux/ksm.h> #include <uapi/linux/lsm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" #include "../../lib/kstrtox.h" /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during * each system call not at open time. The reason is that most of * what we wish to check for permissions in /proc varies at runtime. * * The classic example of a problem is opening file descriptors * in /proc for a task before it execs a suid executable. */ static u8 nlink_tid __ro_after_init; static u8 nlink_tgid __ro_after_init; enum proc_mem_force { PROC_MEM_FORCE_ALWAYS, PROC_MEM_FORCE_PTRACE, PROC_MEM_FORCE_NEVER }; static enum proc_mem_force proc_mem_force_override __ro_after_init = IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER : IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE : PROC_MEM_FORCE_ALWAYS; static const struct constant_table proc_mem_force_table[] __initconst = { { "always", PROC_MEM_FORCE_ALWAYS }, { "ptrace", PROC_MEM_FORCE_PTRACE }, { "never", PROC_MEM_FORCE_NEVER }, { } }; static int __init early_proc_mem_force_override(char *buf) { if (!buf) return -EINVAL; /* * lookup_constant() defaults to proc_mem_force_override to preseve * the initial Kconfig choice in case an invalid param gets passed. */ proc_mem_force_override = lookup_constant(proc_mem_force_table, buf, proc_mem_force_override); return 0; } early_param("proc_mem.force_override", early_proc_mem_force_override); struct pid_entry { const char *name; unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; union proc_op op; }; #define NOD(NAME, MODE, IOP, FOP, OP) { \ .name = (NAME), \ .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ .op = OP, \ } #define DIR(NAME, MODE, iops, fops) \ NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) #define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ { .proc_get_link = get_link } ) #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) #define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) #define ATTR(LSMID, NAME, MODE) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_pid_attr_operations, \ { .lsmid = LSMID }) /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; } return count; } static int get_task_root(struct task_struct *task, struct path *root) { int result = -ENOENT; task_lock(task); if (task->fs) { get_fs_root(task->fs, root); result = 0; } task_unlock(task); return result; } static int proc_cwd_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { task_lock(task); if (task->fs) { get_fs_pwd(task->fs, path); result = 0; } task_unlock(task); put_task_struct(task); } return result; } static int proc_root_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { result = get_task_root(task, path); put_task_struct(task); } return result; } /* * If the user used setproctitle(), we just get the string from * user space at arg_start, and limit it to a maximum of one page. */ static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, size_t count, unsigned long pos, unsigned long arg_start) { char *page; int ret, got; if (pos >= PAGE_SIZE) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); if (got > 0) { int len = strnlen(page, got); /* Include the NUL character if it was found */ if (len < got) len++; if (len > pos) { len -= pos; if (len > count) len = count; len -= copy_to_user(buf, page+pos, len); if (!len) len = -EFAULT; ret = len; } } free_page((unsigned long)page); return ret; } static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) return 0; spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); if (arg_start >= arg_end) return 0; /* * We allow setproctitle() to overwrite the argument * strings, and overflow past the original end. But * only when it overflows into the environment area. */ if (env_start != arg_end || env_end < env_start) env_start = env_end = arg_end; len = env_end - arg_start; /* We're not going to care if "*ppos" has high bits set */ pos = *ppos; if (pos >= len) return 0; if (count > len - pos) count = len - pos; if (!count) return 0; /* * Magical special case: if the argv[] end byte is not * zero, the user has overwritten it with setproctitle(3). * * Possible future enhancement: do this only once when * pos is 0, and set a flag in the 'struct file'. */ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) return get_mm_proctitle(mm, buf, count, pos, arg_start); /* * For the non-setproctitle() case we limit things strictly * to the [arg_start, arg_end[ range. */ pos += arg_start; if (pos < arg_start || pos >= arg_end) return 0; if (count > arg_end - pos) count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; len = 0; while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); got = access_remote_vm(mm, pos, page, size, FOLL_ANON); if (got <= 0) break; got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; break; } pos += got; buf += got; len += got; count -= got; } free_page((unsigned long)page); return len; } static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, size_t count, loff_t *pos) { struct mm_struct *mm; ssize_t ret; mm = get_task_mm(tsk); if (!mm) return 0; ret = get_mm_cmdline(mm, buf, count, pos); mmput(mm); return ret; } static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct task_struct *tsk; ssize_t ret; BUG_ON(*pos < 0); tsk = get_proc_task(file_inode(file)); if (!tsk) return -ESRCH; ret = get_task_cmdline(tsk, buf, count, pos); put_task_struct(tsk); if (ret > 0) *pos += ret; return ret; } static const struct file_operations proc_pid_cmdline_ops = { .read = proc_pid_cmdline_read, .llseek = generic_file_llseek, }; #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. * Returns the resolved symbol. If that fails, simply return the address. */ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto print0; wchan = get_wchan(task); if (wchan && !lookup_symbol_name(wchan, symname)) { seq_puts(m, symname); return 0; } print0: seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */ static int lock_trace(struct task_struct *task) { int err = down_read_killable(&task->signal->exec_update_lock); if (err) return err; if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { up_read(&task->signal->exec_update_lock); return -EPERM; } return 0; } static void unlock_trace(struct task_struct *task) { up_read(&task->signal->exec_update_lock); } #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long *entries; int err; /* * The ability to racily run the kernel stack unwinder on a running task * and then observe the unwinder output is scary; while it is useful for * debugging kernel issues, it can also allow an attacker to leak kernel * stack contents. * Doing this in a manner that is at least safe from races would require * some work to ensure that the remote task can not be scheduled; and * even then, this would still expose the unwinder as local attack * surface. * Therefore, this interface is restricted to root. */ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) return -EACCES; entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; err = lock_trace(task); if (!err) { unsigned int i, nr_entries; nr_entries = stack_trace_save_tsk(task, entries, MAX_STACK_TRACE_DEPTH, 0); for (i = 0; i < nr_entries; i++) { seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); } unlock_trace(task); } kfree(entries); return err; } #endif #ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { if (unlikely(!sched_info_on())) seq_puts(m, "0 0 0\n"); else seq_printf(m, "%llu %llu %lu\n", (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); return 0; } #endif #ifdef CONFIG_LATENCYTOP static int lstats_show_proc(struct seq_file *m, void *v) { int i; struct inode *inode = m->private; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; seq_puts(m, "Latency Top version : v0.1\n"); for (i = 0; i < LT_SAVECOUNT; i++) { struct latency_record *lr = &task->latency_record[i]; if (lr->backtrace[0]) { int q; seq_printf(m, "%i %li %li", lr->count, lr->time, lr->max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { unsigned long bt = lr->backtrace[q]; if (!bt) break; seq_printf(m, " %ps", (void *)bt); } seq_putc(m, '\n'); } } put_task_struct(task); return 0; } static int lstats_open(struct inode *inode, struct file *file) { return single_open(file, lstats_show_proc, inode); } static ssize_t lstats_write(struct file *file, const char __user *buf, size_t count, loff_t *offs) { struct task_struct *task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; clear_tsk_latency_tracing(task); put_task_struct(task); return count; } static const struct file_operations proc_lstats_operations = { .open = lstats_open, .read = seq_read, .write = lstats_write, .llseek = seq_lseek, .release = single_release, }; #endif static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; long badness; badness = oom_badness(task, totalpages); /* * Special case OOM_SCORE_ADJ_MIN for all others scale the * badness value into [0, 2000] range which we have been * exporting for a long time so userspace might depend on it. */ if (badness != LONG_MIN) points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; seq_printf(m, "%lu\n", points); return 0; } struct limit_names { const char *name; const char *unit; }; static const struct limit_names lnames[RLIM_NLIMITS] = { [RLIMIT_CPU] = {"Max cpu time", "seconds"}, [RLIMIT_FSIZE] = {"Max file size", "bytes"}, [RLIMIT_DATA] = {"Max data size", "bytes"}, [RLIMIT_STACK] = {"Max stack size", "bytes"}, [RLIMIT_CORE] = {"Max core file size", "bytes"}, [RLIMIT_RSS] = {"Max resident set", "bytes"}, [RLIMIT_NPROC] = {"Max processes", "processes"}, [RLIMIT_NOFILE] = {"Max open files", "files"}, [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, [RLIMIT_AS] = {"Max address space", "bytes"}, [RLIMIT_LOCKS] = {"Max file locks", "locks"}, [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, [RLIMIT_NICE] = {"Max nice priority", NULL}, [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, }; /* Display limits for a process */ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned int i; unsigned long flags; struct rlimit rlim[RLIM_NLIMITS]; if (!lock_task_sighand(task, &flags)) return 0; memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); /* * print the file header */ seq_puts(m, "Limit " "Soft Limit " "Hard Limit " "Units \n"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) seq_printf(m, "%-25s %-20s ", lnames[i].name, "unlimited"); else seq_printf(m, "%-25s %-20lu ", lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) seq_printf(m, "%-20s ", "unlimited"); else seq_printf(m, "%-20lu ", rlim[i].rlim_max); if (lnames[i].unit) seq_printf(m, "%-10s\n", lnames[i].unit); else seq_putc(m, '\n'); } return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct syscall_info info; u64 *args = &info.data.args[0]; int res; res = lock_trace(task); if (res) return res; if (task_current_syscall(task, &info)) seq_puts(m, "running\n"); else if (info.data.nr < 0) seq_printf(m, "%d 0x%llx 0x%llx\n", info.data.nr, info.sp, info.data.instruction_pointer); else seq_printf(m, "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", info.data.nr, args[0], args[1], args[2], args[3], args[4], args[5], info.sp, info.data.instruction_pointer); unlock_trace(task); return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ /* permission checks */ static bool proc_fd_access_allowed(struct inode *inode) { struct task_struct *task; bool allowed = false; /* Allow access to a task's file descriptors if it is us or we * may use ptrace attach to the process and find out that * information. */ task = get_proc_task(inode); if (task) { allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; } int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); if (attr->ia_valid & ATTR_MODE) return -EPERM; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); return 0; } /* * May current process learn task's sched/cmdline info (for hide_pid_min=1) * or euid/egid (for hide_pid_min=2)? */ static bool has_pid_permissions(struct proc_fs_info *fs_info, struct task_struct *task, enum proc_hidepid hide_pid_min) { /* * If 'hidpid' mount option is set force a ptrace check, * we indicate that we are using a filesystem syscall * by passing PTRACE_MODE_READ_FSCREDS */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); if (fs_info->hide_pid < hide_pid_min) return true; if (in_group_p(fs_info->pid_gid)) return true; return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } static int proc_pid_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; bool has_perms; task = get_proc_task(inode); if (!task) return -ESRCH; has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { if (fs_info->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process * may not stat() a file, it shouldn't be seen * in procfs at all. */ return -ENOENT; } return -EPERM; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); put_task_struct(task); return ret; } static int proc_single_open(struct inode *inode, struct file *filp) { return single_open(filp, proc_single_show, inode); } static const struct file_operations proc_single_file_operations = { .open = proc_single_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); struct mm_struct *mm; if (!task) return ERR_PTR(-ESRCH); mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (IS_ERR(mm)) return mm == ERR_PTR(-ESRCH) ? NULL : mm; /* ensure this mm_struct can't be freed */ mmgrab(mm); /* but do not pin its memory */ mmput(mm); return mm; } static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); file->private_data = mm; return 0; } static int mem_open(struct inode *inode, struct file *file) { if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) return -EINVAL; return __mem_open(inode, file, PTRACE_MODE_ATTACH); } static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm) { struct task_struct *task; bool ptrace_active = false; switch (proc_mem_force_override) { case PROC_MEM_FORCE_NEVER: return false; case PROC_MEM_FORCE_PTRACE: task = get_proc_task(file_inode(file)); if (task) { ptrace_active = READ_ONCE(task->ptrace) && READ_ONCE(task->mm) == mm && READ_ONCE(task->parent) == current; put_task_struct(task); } return ptrace_active; default: return true; } } static ssize_t mem_rw(struct file *file, char __user *buf, size_t count, loff_t *ppos, int write) { struct mm_struct *mm = file->private_data; unsigned long addr = *ppos; ssize_t copied; char *page; unsigned int flags; if (!mm) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; copied = 0; if (!mmget_not_zero(mm)) goto free; flags = write ? FOLL_WRITE : 0; if (proc_mem_foll_force(file, mm)) flags |= FOLL_FORCE; while (count > 0) { size_t this_len = min_t(size_t, count, PAGE_SIZE); if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; break; } if (!write && copy_to_user(buf, page, this_len)) { copied = -EFAULT; break; } buf += this_len; addr += this_len; copied += this_len; count -= this_len; } *ppos = addr; mmput(mm); free: free_page((unsigned long) page); return copied; } static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, buf, count, ppos, 0); } static ssize_t mem_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, (char __user*)buf, count, ppos, 1); } loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: file->f_pos = offset; break; case 1: file->f_pos += offset; break; default: return -EINVAL; } force_successful_syscall_return(); return file->f_pos; } static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) mmdrop(mm); return 0; } static const struct file_operations proc_mem_operations = { .llseek = mem_lseek, .read = mem_read, .write = mem_write, .open = mem_open, .release = mem_release, .fop_flags = FOP_UNSIGNED_OFFSET, }; static int environ_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ); } static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char *page; unsigned long src = *ppos; int ret = 0; struct mm_struct *mm = file->private_data; unsigned long env_start, env_end; /* Ensure the process spawned far enough to have an environment. */ if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; if (!mmget_not_zero(mm)) goto free; spin_lock(&mm->arg_lock); env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); while (count > 0) { size_t this_len, max_len; int retval; if (src >= (env_end - env_start)) break; this_len = env_end - (env_start + src); max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; break; } if (copy_to_user(buf, page, retval)) { ret = -EFAULT; break; } ret += retval; src += retval; buf += retval; count -= retval; } *ppos = src; mmput(mm); free: free_page((unsigned long) page); return ret; } static const struct file_operations proc_environ_operations = { .open = environ_open, .read = environ_read, .llseek = generic_file_llseek, .release = mem_release, }; static int auxv_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); } static ssize_t auxv_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct mm_struct *mm = file->private_data; unsigned int nwords = 0; if (!mm) return 0; do { nwords += 2; } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0])); } static const struct file_operations proc_auxv_operations = { .open = auxv_open, .read = auxv_read, .llseek = generic_file_llseek, .release = mem_release, }; static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; if (!task) return -ESRCH; if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) oom_adj = OOM_ADJUST_MAX; else oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; put_task_struct(task); if (oom_adj > OOM_ADJUST_MAX) oom_adj = OOM_ADJUST_MAX; len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) { struct mm_struct *mm = NULL; struct task_struct *task; int err = 0; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mutex_lock(&oom_adj_mutex); if (legacy) { if (oom_adj < task->signal->oom_score_adj && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } /* * /proc/pid/oom_adj is provided for legacy purposes, ask users to use * /proc/pid/oom_score_adj instead. */ pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); } else { if ((short)oom_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } } /* * Make sure we will check other processes sharing the mm if this is * not vfrok which wants its own oom_score_adj. * pin the mm so it doesn't go away and get reused after task_unlock */ if (!task->vfork_done) { struct task_struct *p = find_lock_task_mm(task); if (p) { if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) { mm = p->mm; mmgrab(mm); } task_unlock(p); } } task->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = (short)oom_adj; trace_oom_score_adj_update(task); if (mm) { struct task_struct *p; rcu_read_lock(); for_each_process(p) { if (same_thread_group(task, p)) continue; /* do not touch kernel threads or the global init */ if (p->flags & PF_KTHREAD || is_global_init(p)) continue; task_lock(p); if (!p->vfork_done && process_shares_mm(p, mm)) { p->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) p->signal->oom_score_adj_min = (short)oom_adj; } task_unlock(p); } rcu_read_unlock(); mmdrop(mm); } err_unlock: mutex_unlock(&oom_adj_mutex); put_task_struct(task); return err; } /* * /proc/pid/oom_adj exists solely for backwards compatibility with previous * kernels. The effective policy is defined by oom_score_adj, which has a * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. * Values written to oom_adj are simply mapped linearly to oom_score_adj. * Processes that become oom disabled via oom_adj will still be oom disabled * with this implementation. * * oom_adj cannot be removed since existing userspace binaries use it. */ static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_adj); if (err) goto out; if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && oom_adj != OOM_DISABLE) { err = -EINVAL; goto out; } /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum * value is always attainable. */ if (oom_adj == OOM_ADJUST_MAX) oom_adj = OOM_SCORE_ADJ_MAX; else oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; err = __set_oom_adj(file, oom_adj, true); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, .llseek = generic_file_llseek, }; static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; size_t len; if (!task) return -ESRCH; oom_score_adj = task->signal->oom_score_adj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_score_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); if (err) goto out; if (oom_score_adj < OOM_SCORE_ADJ_MIN || oom_score_adj > OOM_SCORE_ADJ_MAX) { err = -EINVAL; goto out; } err = __set_oom_adj(file, oom_score_adj, false); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_score_adj_operations = { .read = oom_score_adj_read, .write = oom_score_adj_write, .llseek = default_llseek, }; #ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", from_kuid(file->f_cred->user_ns, audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); uid_t loginuid; kuid_t kloginuid; int rv; /* Don't let kthreads write their own loginuid */ if (current->flags & PF_KTHREAD) return -EPERM; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { rcu_read_unlock(); return -EPERM; } rcu_read_unlock(); if (*ppos != 0) { /* No partial writes. */ return -EINVAL; } rv = kstrtou32_from_user(buf, count, 10, &loginuid); if (rv < 0) return rv; /* is userspace tring to explicitly UNSET the loginuid? */ if (loginuid == AUDIT_UID_UNSET) { kloginuid = INVALID_UID; } else { kloginuid = make_kuid(file->f_cred->user_ns, loginuid); if (!uid_valid(kloginuid)) return -EINVAL; } rv = audit_set_loginuid(kloginuid); if (rv < 0) return rv; return count; } static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, .llseek = generic_file_llseek, }; static ssize_t proc_sessionid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", audit_get_sessionid(task)); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static const struct file_operations proc_sessionid_operations = { .read = proc_sessionid_read, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_FAULT_INJECTION static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; if (!task) return -ESRCH; make_it_fail = task->make_it_fail; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t proc_fault_inject_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF] = {}; int make_it_fail; int rv; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); if (rv < 0) return rv; if (make_it_fail < 0 || make_it_fail > 1) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; put_task_struct(task); return count; } static const struct file_operations proc_fault_inject_operations = { .read = proc_fault_inject_read, .write = proc_fault_inject_write, .llseek = generic_file_llseek, }; static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; int err; unsigned int n; err = kstrtouint_from_user(buf, count, 0, &n); if (err) return err; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->fail_nth = n; put_task_struct(task); return count; } static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char numbuf[PROC_NUMBUF]; ssize_t len; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, numbuf, len); } static const struct file_operations proc_fail_nth_operations = { .read = proc_fail_nth_read, .write = proc_fail_nth_write, }; #endif #ifdef CONFIG_SCHED_DEBUG /* * Print out various scheduling related per-task fields: */ static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_show_task(p, ns, m); put_task_struct(p); return 0; } static ssize_t sched_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_set_task(p); put_task_struct(p); return count; } static int sched_open(struct inode *inode, struct file *filp) { return single_open(filp, sched_show, inode); } static const struct file_operations proc_pid_sched_operations = { .open = sched_open, .read = seq_read, .write = sched_write, .llseek = seq_lseek, .release = single_release, }; #endif #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: */ static int sched_autogroup_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_autogroup_show_task(p, m); put_task_struct(p); return 0; } static ssize_t sched_autogroup_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[PROC_NUMBUF] = {}; int nice; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; err = kstrtoint(strstrip(buffer), 0, &nice); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; err = proc_sched_autogroup_set_nice(p, nice); if (err) count = err; put_task_struct(p); return count; } static int sched_autogroup_open(struct inode *inode, struct file *filp) { int ret; ret = single_open(filp, sched_autogroup_show, NULL); if (!ret) { struct seq_file *m = filp->private_data; m->private = inode; } return ret; } static const struct file_operations proc_pid_sched_autogroup_operations = { .open = sched_autogroup_open, .read = seq_read, .write = sched_autogroup_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_SCHED_AUTOGROUP */ #ifdef CONFIG_TIME_NS static int timens_offsets_show(struct seq_file *m, void *v) { struct task_struct *p; p = get_proc_task(file_inode(m->file)); if (!p) return -ESRCH; proc_timens_show_offsets(p, m); put_task_struct(p); return 0; } static ssize_t timens_offsets_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); struct proc_timens_offset offsets[2]; char *kbuf = NULL, *pos, *next_line; struct task_struct *p; int ret, noffsets; /* Only allow < page size writes at the beginning of the file */ if ((*ppos != 0) || (count >= PAGE_SIZE)) return -EINVAL; /* Slurp in the user data */ kbuf = memdup_user_nul(buf, count); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); /* Parse the user data */ ret = -EINVAL; noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ next_line = strchr(pos, '\n'); if (next_line) { *next_line = '\0'; next_line++; if (*next_line == '\0') next_line = NULL; } err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; clock[sizeof(clock) - 1] = 0; if (strcmp(clock, "monotonic") == 0 || strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) off->clockid = CLOCK_MONOTONIC; else if (strcmp(clock, "boottime") == 0 || strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) off->clockid = CLOCK_BOOTTIME; else goto out; noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) count = next_line - kbuf; break; } } ret = -ESRCH; p = get_proc_task(inode); if (!p) goto out; ret = proc_timens_set_offset(file, p, offsets, noffsets); put_task_struct(p); if (ret) goto out; ret = count; out: kfree(kbuf); return ret; } static int timens_offsets_open(struct inode *inode, struct file *filp) { return single_open(filp, timens_offsets_show, inode); } static const struct file_operations proc_timens_offsets_operations = { .open = timens_offsets_open, .read = seq_read, .write = timens_offsets_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_TIME_NS */ static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN] = {}; const size_t maxlen = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; p = get_proc_task(inode); if (!p) return -ESRCH; if (same_thread_group(current, p)) { set_task_comm(p, buffer); proc_comm_connector(p); } else count = -EINVAL; put_task_struct(p); return count; } static int comm_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_task_name(m, p, false); seq_putc(m, '\n'); put_task_struct(p); return 0; } static int comm_open(struct inode *inode, struct file *filp) { return single_open(filp, comm_show, inode); } static const struct file_operations proc_pid_set_comm_operations = { .open = comm_open, .read = seq_read, .write = comm_write, .llseek = seq_lseek, .release = single_release, }; static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; struct file *exe_file; task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; exe_file = get_task_exe_file(task); put_task_struct(task); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); fput(exe_file); return 0; } else return -ENOENT; } static const char *proc_pid_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct path path; int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = nd_jump_link(&path); out: return ERR_PTR(error); } static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen) { char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); char *pathname; int len; if (!tmp) return -ENOMEM; pathname = d_path(path, tmp, PATH_MAX); len = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; len = tmp + PATH_MAX - 1 - pathname; if (len > buflen) len = buflen; if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: kfree(tmp); return len; } static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) { int error = -EACCES; struct inode *inode = d_inode(dentry); struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = do_proc_readlink(&path, buffer, buflen); path_put(&path); out: return error; } const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_pid_get_link, .setattr = proc_setattr, }; /* building an inode */ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid) { /* Depending on the state of dumpable compute who should own a * proc file for a task. */ const struct cred *cred; kuid_t uid; kgid_t gid; if (unlikely(task->flags & PF_KTHREAD)) { *ruid = GLOBAL_ROOT_UID; *rgid = GLOBAL_ROOT_GID; return; } /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); uid = cred->euid; gid = cred->egid; rcu_read_unlock(); /* * Before the /proc/pid/status file was created the only way to read * the effective uid of a /process was to stat /proc/pid. Reading * /proc/pid/status is slow enough that procps and other packages * kept stating /proc/pid. To keep the rules in /proc simple I have * made this apply to all per process world readable and executable * directories. */ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { struct mm_struct *mm; task_lock(task); mm = task->mm; /* Make non-dumpable tasks owned by some root */ if (mm) { if (get_dumpable(mm) != SUID_DUMP_USER) { struct user_namespace *user_ns = mm->user_ns; uid = make_kuid(user_ns, 0); if (!uid_valid(uid)) uid = GLOBAL_ROOT_UID; gid = make_kgid(user_ns, 0); if (!gid_valid(gid)) gid = GLOBAL_ROOT_GID; } } else { uid = GLOBAL_ROOT_UID; gid = GLOBAL_ROOT_GID; } task_unlock(task); } *ruid = uid; *rgid = gid; } void proc_pid_evict_inode(struct proc_inode *ei) { struct pid *pid = ei->pid; if (S_ISDIR(ei->vfs_inode.i_mode)) { spin_lock(&pid->lock); hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(&pid->lock); } } struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; struct pid *pid; /* We need a new inode */ inode = new_inode(sb); if (!inode) goto out; /* Common stuff */ ei = PROC_I(inode); inode->i_mode = mode; inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); inode->i_op = &proc_def_inode_operations; /* * grab the reference to task. */ pid = get_task_pid(task, PIDTYPE_PID); if (!pid) goto out_unlock; /* Let the pid remember us for quick removal */ ei->pid = pid; task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: return inode; out_unlock: iput(inode); return NULL; } /* * Generating an inode and adding it into @pid->inodes, so that task will * invalidate inode's dentry before being released. * * This helper is used for creating dir-type entries under '/proc' and * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' * can be released by invalidating '/proc/<tgid>' dentry. * In theory, dentries under '/proc/<tgid>/task' can also be released by * invalidating '/proc/<tgid>' dentry, we reserve it to handle single * thread exiting situation: Any one of threads should invalidate its * '/proc/<tgid>/task/<pid>' dentry before released. */ static struct inode *proc_pid_make_base_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode *inode; struct proc_inode *ei; struct pid *pid; inode = proc_pid_make_inode(sb, task, mode); if (!inode) return NULL; /* Let proc_flush_pid find this directory inode */ ei = PROC_I(inode); pid = ei->pid; spin_lock(&pid->lock); hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); spin_unlock(&pid->lock); return inode; } int pid_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, * it only makes getattr() consistent with readdir(). */ return -ENOENT; } task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; } /* dentry stuff */ /* * Set <pid>/... inode ownership (can change due to setuid(), etc.) */ void pid_update_inode(struct task_struct *task, struct inode *inode) { task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); } /* * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * */ static int pid_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; int ret = 0; rcu_read_lock(); inode = d_inode_rcu(dentry); if (!inode) goto out; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { pid_update_inode(task, inode); ret = 1; } out: rcu_read_unlock(); return ret; } static inline bool proc_inode_is_dead(struct inode *inode) { return !proc_pid(inode)->tasks[PIDTYPE_PID].first; } int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ return proc_inode_is_dead(d_inode(dentry)); } const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, }; /* Lookups */ /* * Fill a directory entry. * * If possible create the dcache entry and derive our inode number and * file type from dcache entry. * * Since all of the proc inode numbers are dynamically generated, the inode * numbers do not exist until the inode is cache. This means creating * the dcache entry in readdir is necessary to keep the inode numbers * reported by readdir in sync with the inode numbers reported * by stat. */ bool proc_fill_cache(struct file *file, struct dir_context *ctx, const char *name, unsigned int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = file->f_path.dentry; struct qstr qname = QSTR_INIT(name, len); struct inode *inode; unsigned type = DT_UNKNOWN; ino_t ino = 1; child = d_hash_and_lookup(dir, &qname); if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { struct dentry *res; res = instantiate(child, task, ptr); d_lookup_done(child); if (unlikely(res)) { dput(child); child = res; if (IS_ERR(child)) goto end_instantiate; } } } inode = d_inode(child); ino = inode->i_ino; type = inode->i_mode >> 12; dput(child); end_instantiate: return dir_emit(ctx, name, len, ino, type); } /* * dname_to_vma_addr - maps a dentry name into two unsigned longs * which represent vma start and end addresses. */ static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { const char *str = dentry->d_name.name; unsigned long long sval, eval; unsigned int len; if (str[0] == '0' && str[1] != '-') return -EINVAL; len = _parse_integer(str, 16, &sval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (sval != (unsigned long)sval) return -EINVAL; str += len; if (*str != '-') return -EINVAL; str++; if (str[0] == '0' && str[1]) return -EINVAL; len = _parse_integer(str, 16, &eval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (eval != (unsigned long)eval) return -EINVAL; str += len; if (*str != '\0') return -EINVAL; *start = sval; *end = eval; return 0; } static int map_files_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; struct inode *inode; int status = 0; if (flags & LOOKUP_RCU) return -ECHILD; inode = d_inode(dentry); task = get_proc_task(inode); if (!task) goto out_notask; mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR(mm)) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { status = mmap_read_lock_killable(mm); if (!status) { exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); mmap_read_unlock(mm); } } mmput(mm); if (exact_vma_exists) { task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); status = 1; } out: put_task_struct(task); out_notask: return status; } static const struct dentry_operations tid_map_files_dentry_operations = { .d_revalidate = map_files_d_revalidate, .d_delete = pid_delete_dentry, }; static int map_files_get_link(struct dentry *dentry, struct path *path) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; int rc; rc = -ENOENT; task = get_proc_task(d_inode(dentry)); if (!task) goto out; mm = get_task_mm(task); put_task_struct(task); if (!mm) goto out; rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); if (rc) goto out_mmput; rc = mmap_read_lock_killable(mm); if (rc) goto out_mmput; rc = -ENOENT; vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = *file_user_path(vma->vm_file); path_get(path); rc = 0; } mmap_read_unlock(mm); out_mmput: mmput(mm); out: return rc; } struct map_files_info { unsigned long start; unsigned long end; fmode_t mode; }; /* * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due * to concerns about how the symlinks may be used to bypass permissions on * ancestor directories in the path to the file in question. */ static const char * proc_map_files_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { if (!checkpoint_restore_ns_capable(&init_user_ns)) return ERR_PTR(-EPERM); return proc_pid_get_link(dentry, inode, done); } /* * Identical to proc_pid_link_inode_operations except for get_link() */ static const struct inode_operations proc_map_files_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_map_files_get_link, .setattr = proc_setattr, }; static struct dentry * proc_map_files_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | ((mode & FMODE_READ ) ? S_IRUSR : 0) | ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); ei->op.proc_get_link = map_files_get_link; inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; return proc_splice_unmountable(inode, dentry, &tid_map_files_dentry_operations); } static struct dentry *proc_map_files_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct dentry *result; struct mm_struct *mm; result = ERR_PTR(-ENOENT); task = get_proc_task(dir); if (!task) goto out; result = ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; result = ERR_PTR(-EINTR); if (mmap_read_lock_killable(mm)) goto out_put_mm; result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; if (vma->vm_file) result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: put_task_struct(task); out: return result; } static const struct inode_operations proc_map_files_inode_operations = { .lookup = proc_map_files_lookup, .permission = proc_fd_permission, .setattr = proc_setattr, }; static int proc_map_files_readdir(struct file *file, struct dir_context *ctx) { struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; unsigned long nr_files, pos, i; GENRADIX(struct map_files_info) fa; struct map_files_info *p; int ret; struct vma_iterator vmi; genradix_init(&fa); ret = -ENOENT; task = get_proc_task(file_inode(file)); if (!task) goto out; ret = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; if (!dir_emit_dots(file, ctx)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); goto out_put_task; } nr_files = 0; /* * We need two passes here: * * 1) Collect vmas of mapped files with mmap_lock taken * 2) Release mmap_lock and instantiate entries * * otherwise we get lockdep complained, since filldir() * routine might require mmap_lock taken in might_fault(). */ pos = 2; vma_iter_init(&vmi, mm, 0); for_each_vma(vmi, vma) { if (!vma->vm_file) continue; if (++pos <= ctx->pos) continue; p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); if (!p) { ret = -ENOMEM; mmap_read_unlock(mm); mmput(mm); goto out_put_task; } p->start = vma->vm_start; p->end = vma->vm_end; p->mode = vma->vm_file->f_mode; } mmap_read_unlock(mm); mmput(mm); for (i = 0; i < nr_files; i++) { char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ unsigned int len; p = genradix_ptr(&fa, i); len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) break; ctx->pos++; } out_put_task: put_task_struct(task); out: genradix_free(&fa); return ret; } static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, .iterate_shared = proc_map_files_readdir, .llseek = generic_file_llseek, }; #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; struct sighand_struct *sighand; struct pid_namespace *ns; unsigned long flags; }; static void *timers_start(struct seq_file *m, loff_t *pos) { struct timers_private *tp = m->private; tp->task = get_pid_task(tp->pid, PIDTYPE_PID); if (!tp->task) return ERR_PTR(-ESRCH); tp->sighand = lock_task_sighand(tp->task, &tp->flags); if (!tp->sighand) return ERR_PTR(-ESRCH); return seq_hlist_start(&tp->task->signal->posix_timers, *pos); } static void *timers_next(struct seq_file *m, void *v, loff_t *pos) { struct timers_private *tp = m->private; return seq_hlist_next(v, &tp->task->signal->posix_timers, pos); } static void timers_stop(struct seq_file *m, void *v) { struct timers_private *tp = m->private; if (tp->sighand) { unlock_task_sighand(tp->task, &tp->flags); tp->sighand = NULL; } if (tp->task) { put_task_struct(tp->task); tp->task = NULL; } } static int show_timer(struct seq_file *m, void *v) { struct k_itimer *timer; struct timers_private *tp = m->private; int notify; static const char * const nstr[] = { [SIGEV_SIGNAL] = "signal", [SIGEV_NONE] = "none", [SIGEV_THREAD] = "thread", }; timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list); notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); seq_printf(m, "signal: %d/%px\n", timer->sigq.info.si_signo, timer->sigq.info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID], (notify & SIGEV_THREAD_ID) ? "tid" : "pid", pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; } static const struct seq_operations proc_timers_seq_ops = { .start = timers_start, .next = timers_next, .stop = timers_stop, .show = show_timer, }; static int proc_timers_open(struct inode *inode, struct file *file) { struct timers_private *tp; tp = __seq_open_private(file, &proc_timers_seq_ops, sizeof(struct timers_private)); if (!tp) return -ENOMEM; tp->pid = proc_pid(inode); tp->ns = proc_pid_ns(inode->i_sb); return 0; } static const struct file_operations proc_timers_operations = { .open = proc_timers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, }; #endif static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; u64 slack_ns; int err; err = kstrtoull_from_user(buf, count, 10, &slack_ns); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); count = -EPERM; goto out; } rcu_read_unlock(); err = security_task_setscheduler(p); if (err) { count = err; goto out; } } task_lock(p); if (rt_or_dl_task_policy(p)) slack_ns = 0; else if (slack_ns == 0) slack_ns = p->default_timer_slack_ns; p->timer_slack_ns = slack_ns; task_unlock(p); out: put_task_struct(p); return count; } static int timerslack_ns_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; int err = 0; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; goto out; } rcu_read_unlock(); err = security_task_getscheduler(p); if (err) goto out; } task_lock(p); seq_printf(m, "%llu\n", p->timer_slack_ns); task_unlock(p); out: put_task_struct(p); return err; } static int timerslack_ns_open(struct inode *inode, struct file *filp) { return single_open(filp, timerslack_ns_show, inode); } static const struct file_operations proc_pid_set_timerslack_ns_operations = { .open = timerslack_ns_open, .read = seq_read, .write = timerslack_ns_write, .llseek = seq_lseek, .release = single_release, }; static struct dentry *proc_pident_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); if (S_ISDIR(inode->i_mode)) set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) inode->i_op = p->iop; if (p->fop) inode->i_fop = p->fop; ei->op = p->op; pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, const struct pid_entry *p, const struct pid_entry *end) { struct task_struct *task = get_proc_task(dir); struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ for (; p < end; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) { res = proc_pident_instantiate(dentry, task, p); break; } } put_task_struct(task); out_no_task: return res; } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, const struct pid_entry *ents, unsigned int nents) { struct task_struct *task = get_proc_task(file_inode(file)); const struct pid_entry *p; if (!task) return -ENOENT; if (!dir_emit_dots(file, ctx)) goto out; if (ctx->pos >= nents + 2) goto out; for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; ctx->pos++; } out: put_task_struct(task); return 0; } #ifdef CONFIG_SECURITY static int proc_pid_attr_open(struct inode *inode, struct file *file) { file->private_data = NULL; __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); return 0; } static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; length = security_getprocattr(task, PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, &p); put_task_struct(task); if (length > 0) length = simple_read_from_buffer(buf, count, ppos, p, length); kfree(p); return length; } static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task; void *page; int rv; /* A task may only write when it was the opener. */ if (file->private_data != current->mm) return -EPERM; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (!task) { rcu_read_unlock(); return -ESRCH; } /* A task may only write its own attributes. */ if (current != task) { rcu_read_unlock(); return -EACCES; } /* Prevent changes to overridden credentials. */ if (current_cred() != current_real_cred()) { rcu_read_unlock(); return -EBUSY; } rcu_read_unlock(); if (count > PAGE_SIZE) count = PAGE_SIZE; /* No partial writes. */ if (*ppos != 0) return -EINVAL; page = memdup_user(buf, count); if (IS_ERR(page)) { rv = PTR_ERR(page); goto out; } /* Guard against adverse ptrace interaction */ rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex); if (rv < 0) goto out_free; rv = security_setprocattr(PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, page, count); mutex_unlock(&current->signal->cred_guard_mutex); out_free: kfree(page); out: return rv; } static const struct file_operations proc_pid_attr_operations = { .open = proc_pid_attr_open, .read = proc_pid_attr_read, .write = proc_pid_attr_write, .llseek = generic_file_llseek, .release = mem_release, }; #define LSM_DIR_OPS(LSM) \ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ struct dir_context *ctx) \ { \ return proc_pident_readdir(filp, ctx, \ LSM##_attr_dir_stuff, \ ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct file_operations proc_##LSM##_attr_dir_ops = { \ .read = generic_read_dir, \ .iterate_shared = proc_##LSM##_attr_dir_iterate, \ .llseek = default_llseek, \ }; \ \ static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ struct dentry *dentry, unsigned int flags) \ { \ return proc_pident_lookup(dir, dentry, \ LSM##_attr_dir_stuff, \ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ .lookup = proc_##LSM##_attr_dir_lookup, \ .getattr = pid_getattr, \ .setattr = proc_setattr, \ } #ifdef CONFIG_SECURITY_SMACK static const struct pid_entry smack_attr_dir_stuff[] = { ATTR(LSM_ID_SMACK, "current", 0666), }; LSM_DIR_OPS(smack); #endif #ifdef CONFIG_SECURITY_APPARMOR static const struct pid_entry apparmor_attr_dir_stuff[] = { ATTR(LSM_ID_APPARMOR, "current", 0666), ATTR(LSM_ID_APPARMOR, "prev", 0444), ATTR(LSM_ID_APPARMOR, "exec", 0666), }; LSM_DIR_OPS(apparmor); #endif static const struct pid_entry attr_dir_stuff[] = { ATTR(LSM_ID_UNDEF, "current", 0666), ATTR(LSM_ID_UNDEF, "prev", 0444), ATTR(LSM_ID_UNDEF, "exec", 0666), ATTR(LSM_ID_UNDEF, "fscreate", 0666), ATTR(LSM_ID_UNDEF, "keycreate", 0666), ATTR(LSM_ID_UNDEF, "sockcreate", 0666), #ifdef CONFIG_SECURITY_SMACK DIR("smack", 0555, proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), #endif #ifdef CONFIG_SECURITY_APPARMOR DIR("apparmor", 0555, proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops), #endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, .iterate_shared = proc_attr_dir_readdir, .llseek = generic_file_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); } static const struct inode_operations proc_attr_dir_inode_operations = { .lookup = proc_attr_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; #endif #ifdef CONFIG_ELF_CORE static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; char buffer[PROC_NUMBUF]; size_t len; int ret; if (!task) return -ESRCH; ret = 0; mm = get_task_mm(task); if (mm) { len = snprintf(buffer, sizeof(buffer), "%08lx\n", ((mm->flags & MMF_DUMP_FILTER_MASK) >> MMF_DUMP_FILTER_SHIFT)); mmput(mm); ret = simple_read_from_buffer(buf, count, ppos, buffer, len); } put_task_struct(task); return ret; } static ssize_t proc_coredump_filter_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; struct mm_struct *mm; unsigned int val; int ret; int i; unsigned long mask; ret = kstrtouint_from_user(buf, count, 0, &val); if (ret < 0) return ret; ret = -ESRCH; task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; mm = get_task_mm(task); if (!mm) goto out_no_mm; ret = 0; for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { if (val & mask) set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); else clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); } mmput(mm); out_no_mm: put_task_struct(task); out_no_task: if (ret < 0) return ret; return count; } static const struct file_operations proc_coredump_filter_operations = { .read = proc_coredump_filter_read, .write = proc_coredump_filter_write, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) { struct task_io_accounting acct; int result; result = down_read_killable(&task->signal->exec_update_lock); if (result) return result; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } if (whole) { struct signal_struct *sig = task->signal; struct task_struct *t; unsigned int seq = 1; unsigned long flags; rcu_read_lock(); do { seq++; /* 2 on the 1st/lockless path, otherwise odd */ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); acct = sig->ioac; __for_each_thread(sig, t) task_io_accounting_add(&acct, &t->ioac); } while (need_seqretry(&sig->stats_lock, seq)); done_seqretry_irqrestore(&sig->stats_lock, seq, flags); rcu_read_unlock(); } else { acct = task->ioac; } seq_printf(m, "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", (unsigned long long)acct.rchar, (unsigned long long)acct.wchar, (unsigned long long)acct.syscr, (unsigned long long)acct.syscw, (unsigned long long)acct.read_bytes, (unsigned long long)acct.write_bytes, (unsigned long long)acct.cancelled_write_bytes); result = 0; out_unlock: up_read(&task->signal->exec_update_lock); return result; } static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 0); } static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 1); } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_USER_NS static int proc_id_map_open(struct inode *inode, struct file *file, const struct seq_operations *seq_ops) { struct user_namespace *ns = NULL; struct task_struct *task; struct seq_file *seq; int ret = -EINVAL; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; ret = seq_open(file, seq_ops); if (ret) goto err_put_ns; seq = file->private_data; seq->private = ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_id_map_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; put_user_ns(ns); return seq_release(inode, file); } static int proc_uid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_uid_seq_operations); } static int proc_gid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_gid_seq_operations); } static int proc_projid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_projid_seq_operations); } static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_gid_map_operations = { .open = proc_gid_map_open, .write = proc_gid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_projid_map_operations = { .open = proc_projid_map_open, .write = proc_projid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static int proc_setgroups_open(struct inode *inode, struct file *file) { struct user_namespace *ns = NULL; struct task_struct *task; int ret; ret = -ESRCH; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; if (file->f_mode & FMODE_WRITE) { ret = -EACCES; if (!ns_capable(ns, CAP_SYS_ADMIN)) goto err_put_ns; } ret = single_open(file, &proc_setgroups_show, ns); if (ret) goto err_put_ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_setgroups_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; int ret = single_release(inode, file); put_user_ns(ns); return ret; } static const struct file_operations proc_setgroups_operations = { .open = proc_setgroups_open, .write = proc_setgroups_write, .read = seq_read, .llseek = seq_lseek, .release = proc_setgroups_release, }; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { int err = lock_trace(task); if (!err) { seq_printf(m, "%08x\n", task->personality); unlock_trace(task); } return err; } #ifdef CONFIG_LIVEPATCH static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { seq_printf(m, "%d\n", task->patch_state); return 0; } #endif /* CONFIG_LIVEPATCH */ #ifdef CONFIG_KSM static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; mm = get_task_mm(task); if (mm) { seq_printf(m, "%lu\n", mm->ksm_merging_pages); mmput(mm); } return 0; } static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; int ret = 0; mm = get_task_mm(task); if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm)); seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages); seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm)); seq_printf(m, "ksm_merge_any: %s\n", test_bit(MMF_VM_MERGE_ANY, &mm->flags) ? "yes" : "no"); ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); return ret; } seq_printf(m, "ksm_mergeable: %s\n", ksm_process_mergeable(mm) ? "yes" : "no"); mmap_read_unlock(mm); mmput(mm); } return 0; } #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long prev_depth = THREAD_SIZE - (task->prev_lowest_stack & (THREAD_SIZE - 1)); unsigned long depth = THREAD_SIZE - (task->lowest_stack & (THREAD_SIZE - 1)); seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n", prev_depth, depth); return 0; } #endif /* CONFIG_STACKLEAK_METRICS */ /* * Thread groups */ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif #ifdef CONFIG_TIME_NS REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_ELF_CORE REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tgid_base_readdir, .llseek = generic_file_llseek, }; struct pid *tgid_pidfd_to_pid(const struct file *file) { if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); } static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tgid_base_stuff, tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); } static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; /** * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache. * @pid: pid that should be flushed. * * This function walks a list of inodes (that belong to any proc * filesystem) that are attached to the pid and flushes them from * the dentry cache. * * It is safe and reasonable to cache /proc entries for a task until * that task exits. After that they just clog up the dcache with * useless entries, possibly causing useful dcache entries to be * flushed instead. This routine is provided to flush those useless * dcache entries when a process is reaped. * * NOTE: This routine is just an optimization so it does not guarantee * that no dcache entries will exist after a process is reaped * it just makes it very unlikely that any will persist. */ void proc_flush_pid(struct pid *pid) { proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); } static struct dentry *proc_pid_instantiate(struct dentry * dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; set_nlink(inode, nlink_tgid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) { struct task_struct *task; unsigned tgid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; /* Limit procfs to only ptraceable tasks */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) { if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS)) goto out_put_task; } result = proc_pid_instantiate(dentry, task, NULL); out_put_task: put_task_struct(task); out: return result; } /* * Find the first task with tgid >= tgid * */ struct tgid_iter { unsigned int tgid; struct task_struct *task; }; static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) { struct pid *pid; if (iter.task) put_task_struct(iter.task); rcu_read_lock(); retry: iter.task = NULL; pid = find_ge_pid(iter.tgid, ns); if (pid) { iter.tgid = pid_nr_ns(pid, ns); iter.task = pid_task(pid, PIDTYPE_TGID); if (!iter.task) { iter.tgid += 1; goto retry; } get_task_struct(iter.task); } rcu_read_unlock(); return iter; } #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; if (pos == TGID_OFFSET - 2) { struct inode *inode = d_inode(fs_info->proc_self); if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { struct inode *inode = d_inode(fs_info->proc_thread_self); if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } iter.tgid = pos - TGID_OFFSET; iter.task = NULL; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[10 + 1]; unsigned int len; cond_resched(); if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { put_task_struct(iter.task); return 0; } } ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; return 0; } /* * proc_tid_comm_permission is a special permission function exclusively * used for the node /proc/<pid>/task/<tid>/comm. * It bypasses generic permission checks in the case where a task of the same * task group attempts to access the node. * The rationale behind this is that glibc and bionic access this node for * cross thread naming (pthread_set/getname_np(!self)). However, if * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, * which locks out the cross thread naming implementation. * This function makes sure that the node is always accessible for members of * same thread group. */ static int proc_tid_comm_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { bool is_same_tgroup; struct task_struct *task; task = get_proc_task(inode); if (!task) return -ESRCH; is_same_tgroup = same_thread_group(current, task); put_task_struct(task); if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { /* This file (/proc/<pid>/task/<tid>/comm) can always be * read or written by the members of the corresponding * thread group. */ return 0; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_tid_comm_inode_operations = { .setattr = proc_setattr, .permission = proc_tid_comm_permission, }; /* * Tasks */ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, &proc_tid_comm_inode_operations, &proc_pid_set_comm_operations, {}), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tid_base_stuff, tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); } static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tid_base_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags |= S_IMMUTABLE; set_nlink(inode, nlink_tid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); if (!leader) goto out_no_task; tid = name_to_int(&dentry->d_name); if (tid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dentry, task, NULL); out_drop_task: put_task_struct(task); out: put_task_struct(leader); out_no_task: return result; } /* * Find the first tid of a thread group to return to user space. * * Usually this is just the thread group leader, but if the users * buffer was too small or there was a seek into the middle of the * directory we have more work todo. * * In the case of a short read we start with find_task_by_pid. * * In the case of a seek we start with the leader and walk nr * threads past it. */ static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, struct pid_namespace *ns) { struct task_struct *pos, *task; unsigned long nr = f_pos; if (nr != f_pos) /* 32bit overflow? */ return NULL; rcu_read_lock(); task = pid_task(pid, PIDTYPE_PID); if (!task) goto fail; /* Attempt to start with the tid of a thread */ if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ if (nr >= get_nr_threads(task)) goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ for_each_thread(task, pos) { if (!nr--) goto found; } fail: pos = NULL; goto out; found: get_task_struct(pos); out: rcu_read_unlock(); return pos; } /* * Find the next thread in the thread list. * Return NULL if there is an error or no next thread. * * The reference to the input task_struct is released. */ static struct task_struct *next_tid(struct task_struct *start) { struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { pos = __next_thread(start); if (pos) get_task_struct(pos); } rcu_read_unlock(); put_task_struct(start); return pos; } /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct task_struct *task; struct pid_namespace *ns; int tid; if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) return 0; /* We cache the tgid value that the last readdir call couldn't * return and lseek resets it to 0. */ ns = proc_pid_ns(inode->i_sb); tid = (int)(intptr_t)file->private_data; file->private_data = NULL; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[10 + 1]; unsigned int len; tid = task_pid_nr_ns(task, ns); if (!tid) continue; /* The task has just exited. */ len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ file->private_data = (void *)(intptr_t)tid; put_task_struct(task); break; } } return 0; } static int proc_task_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (p) { stat->nlink += get_nr_threads(p); put_task_struct(p); } return 0; } /* * proc_task_readdir() set @file->private_data to a positive integer * value, so casting that to u64 is safe. generic_llseek_cookie() will * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is * here to catch any unexpected change in behavior either in * proc_task_readdir() or generic_llseek_cookie(). */ static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) { u64 cookie = (u64)(intptr_t)file->private_data; loff_t off; off = generic_llseek_cookie(file, offset, whence, &cookie); WARN_ON_ONCE(cookie > INT_MAX); file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ return off; } static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, .llseek = proc_dir_llseek, }; void __init set_proc_pid_nlink(void) { nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); }
16 26 4 4 26 21 31 31 31 5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/netrom.h> static void nr_heartbeat_expiry(struct timer_list *); static void nr_t1timer_expiry(struct timer_list *); static void nr_t2timer_expiry(struct timer_list *); static void nr_t4timer_expiry(struct timer_list *); static void nr_idletimer_expiry(struct timer_list *); void nr_init_timers(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ sk->sk_timer.function = nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) { return timer_pending(&nr_sk(sk)->t1timer); } static void nr_heartbeat_expiry(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct nr_sock *nr = nr_sk(sk); bh_lock_sock(sk); switch (nr->state) { case NR_STATE_0: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { if (sk->sk_state == TCP_LISTEN) sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; } break; case NR_STATE_3: /* * Check for the state of the receive buffer. */ if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && (nr->condition & NR_COND_OWN_RX_BUSY)) { nr->condition &= ~NR_COND_OWN_RX_BUSY; nr->condition &= ~NR_COND_ACK_PENDING; nr->vl = nr->vr; nr_write_internal(sk, NR_INFOACK); break; } break; } nr_start_heartbeat(sk); bh_unlock_sock(sk); out: sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t2timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); if (nr->condition & NR_COND_ACK_PENDING) { nr->condition &= ~NR_COND_ACK_PENDING; nr_enquiry_response(sk); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t4timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, idletimer); struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_clear_queues(sk); nr->n2count = 0; nr_write_internal(sk, NR_DISCREQ); nr->state = NR_STATE_2; nr_start_t1timer(sk); nr_stop_t2timer(sk); nr_stop_t4timer(sk); sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) { struct nr_sock *nr = from_timer(nr, t, t1timer); struct sock *sk = &nr->sock; bh_lock_sock(sk); switch (nr->state) { case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); } break; case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); } break; case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); goto out; } else { nr->n2count++; nr_requeue_frames(sk); } break; } nr_start_t1timer(sk); out: bh_unlock_sock(sk); sock_put(sk); }
20 96 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Reiner Sailer <sailer@watson.ibm.com> * Mimi Zohar <zohar@us.ibm.com> * * File: ima.h * internal Integrity Measurement Architecture (IMA) definitions */ #ifndef __LINUX_IMA_H #define __LINUX_IMA_H #include <linux/types.h> #include <linux/crypto.h> #include <linux/fs.h> #include <linux/security.h> #include <linux/hash.h> #include <linux/tpm.h> #include <linux/audit.h> #include <crypto/hash_info.h> #include "../integrity.h" enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN, IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII }; enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 }; /* digest size for IMA, fits SHA1 or MD5 */ #define IMA_DIGEST_SIZE SHA1_DIGEST_SIZE #define IMA_EVENT_NAME_LEN_MAX 255 #define IMA_HASH_BITS 10 #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS) #define IMA_TEMPLATE_FIELD_ID_MAX_LEN 16 #define IMA_TEMPLATE_NUM_FIELDS_MAX 15 #define IMA_TEMPLATE_IMA_NAME "ima" #define IMA_TEMPLATE_IMA_FMT "d|n" #define NR_BANKS(chip) ((chip != NULL) ? chip->nr_allocated_banks : 0) /* current content of the policy */ extern int ima_policy_flag; /* bitset of digests algorithms allowed in the setxattr hook */ extern atomic_t ima_setxattr_allowed_hash_algorithms; /* IMA hash algorithm description */ struct ima_algo_desc { struct crypto_shash *tfm; enum hash_algo algo; }; /* set during initialization */ extern int ima_hash_algo __ro_after_init; extern int ima_sha1_idx __ro_after_init; extern int ima_hash_algo_idx __ro_after_init; extern int ima_extra_slots __ro_after_init; extern struct ima_algo_desc *ima_algo_array __ro_after_init; extern int ima_appraise; extern struct tpm_chip *ima_tpm_chip; extern const char boot_aggregate_name[]; /* IMA event related data */ struct ima_event_data { struct ima_iint_cache *iint; struct file *file; const unsigned char *filename; struct evm_ima_xattr_data *xattr_value; int xattr_len; const struct modsig *modsig; const char *violation; const void *buf; int buf_len; }; /* IMA template field data definition */ struct ima_field_data { u8 *data; u32 len; }; /* IMA template field definition */ struct ima_template_field { const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN]; int (*field_init)(struct ima_event_data *event_data, struct ima_field_data *field_data); void (*field_show)(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data); }; /* IMA template descriptor definition */ struct ima_template_desc { struct list_head list; char *name; char *fmt; int num_fields; const struct ima_template_field **fields; }; struct ima_template_entry { int pcr; struct tpm_digest *digests; struct ima_template_desc *template_desc; /* template descriptor */ u32 template_data_len; struct ima_field_data template_data[]; /* template related data */ }; struct ima_queue_entry { struct hlist_node hnext; /* place in hash collision list */ struct list_head later; /* place in ima_measurements list */ struct ima_template_entry *entry; }; extern struct list_head ima_measurements; /* list of all measurements */ /* Some details preceding the binary serialized measurement list */ struct ima_kexec_hdr { u16 version; u16 _reserved0; u32 _reserved1; u64 buffer_size; u64 count; }; /* IMA iint action cache flags */ #define IMA_MEASURE 0x00000001 #define IMA_MEASURED 0x00000002 #define IMA_APPRAISE 0x00000004 #define IMA_APPRAISED 0x00000008 /*#define IMA_COLLECT 0x00000010 do not use this flag */ #define IMA_COLLECTED 0x00000020 #define IMA_AUDIT 0x00000040 #define IMA_AUDITED 0x00000080 #define IMA_HASH 0x00000100 #define IMA_HASHED 0x00000200 /* IMA iint policy rule cache flags */ #define IMA_NONACTION_FLAGS 0xff000000 #define IMA_DIGSIG_REQUIRED 0x01000000 #define IMA_PERMIT_DIRECTIO 0x02000000 #define IMA_NEW_FILE 0x04000000 #define IMA_FAIL_UNVERIFIABLE_SIGS 0x10000000 #define IMA_MODSIG_ALLOWED 0x20000000 #define IMA_CHECK_BLACKLIST 0x40000000 #define IMA_VERITY_REQUIRED 0x80000000 /* Exclude non-action flags which are not rule-specific. */ #define IMA_NONACTION_RULE_FLAGS (IMA_NONACTION_FLAGS & ~IMA_NEW_FILE) #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_HASH | IMA_APPRAISE_SUBMASK) #define IMA_DONE_MASK (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \ IMA_HASHED | IMA_COLLECTED | \ IMA_APPRAISED_SUBMASK) /* IMA iint subaction appraise cache flags */ #define IMA_FILE_APPRAISE 0x00001000 #define IMA_FILE_APPRAISED 0x00002000 #define IMA_MMAP_APPRAISE 0x00004000 #define IMA_MMAP_APPRAISED 0x00008000 #define IMA_BPRM_APPRAISE 0x00010000 #define IMA_BPRM_APPRAISED 0x00020000 #define IMA_READ_APPRAISE 0x00040000 #define IMA_READ_APPRAISED 0x00080000 #define IMA_CREDS_APPRAISE 0x00100000 #define IMA_CREDS_APPRAISED 0x00200000 #define IMA_APPRAISE_SUBMASK (IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \ IMA_BPRM_APPRAISE | IMA_READ_APPRAISE | \ IMA_CREDS_APPRAISE) #define IMA_APPRAISED_SUBMASK (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \ IMA_BPRM_APPRAISED | IMA_READ_APPRAISED | \ IMA_CREDS_APPRAISED) /* IMA iint cache atomic_flags */ #define IMA_CHANGE_XATTR 0 #define IMA_UPDATE_XATTR 1 #define IMA_CHANGE_ATTR 2 #define IMA_DIGSIG 3 #define IMA_MUST_MEASURE 4 /* IMA integrity metadata associated with an inode */ struct ima_iint_cache { struct mutex mutex; /* protects: version, flags, digest */ struct integrity_inode_attributes real_inode; unsigned long flags; unsigned long measured_pcrs; unsigned long atomic_flags; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; enum integrity_status ima_read_status:4; enum integrity_status ima_creds_status:4; struct ima_digest_data *ima_hash; }; extern struct lsm_blob_sizes ima_blob_sizes; static inline struct ima_iint_cache * ima_inode_get_iint(const struct inode *inode) { struct ima_iint_cache **iint_sec; if (unlikely(!inode->i_security)) return NULL; iint_sec = inode->i_security + ima_blob_sizes.lbs_inode; return *iint_sec; } static inline void ima_inode_set_iint(const struct inode *inode, struct ima_iint_cache *iint) { struct ima_iint_cache **iint_sec; if (unlikely(!inode->i_security)) return; iint_sec = inode->i_security + ima_blob_sizes.lbs_inode; *iint_sec = iint; } struct ima_iint_cache *ima_iint_find(struct inode *inode); struct ima_iint_cache *ima_inode_get(struct inode *inode); void ima_inode_free_rcu(void *inode_security); void __init ima_iintcache_init(void); extern const int read_idmap[]; #ifdef CONFIG_HAVE_IMA_KEXEC void ima_load_kexec_buffer(void); #else static inline void ima_load_kexec_buffer(void) {} #endif /* CONFIG_HAVE_IMA_KEXEC */ #ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS void ima_post_key_create_or_update(struct key *keyring, struct key *key, const void *payload, size_t plen, unsigned long flags, bool create); #endif /* * The default binary_runtime_measurements list format is defined as the * platform native format. The canonical format is defined as little-endian. */ extern bool ima_canonical_fmt; /* Internal IMA function definitions */ int ima_init(void); int ima_fs_init(void); int ima_add_template_entry(struct ima_template_entry *entry, int violation, const char *op, struct inode *inode, const unsigned char *filename); int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash); int ima_calc_buffer_hash(const void *buf, loff_t len, struct ima_digest_data *hash); int ima_calc_field_array_hash(struct ima_field_data *field_data, struct ima_template_entry *entry); int ima_calc_boot_aggregate(struct ima_digest_data *hash); void ima_add_violation(struct file *file, const unsigned char *filename, struct ima_iint_cache *iint, const char *op, const char *cause); int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); int template_desc_init_fields(const char *template_fmt, const struct ima_template_field ***fields, int *num_fields); struct ima_template_desc *ima_template_desc_current(void); struct ima_template_desc *ima_template_desc_buf(void); struct ima_template_desc *lookup_template_desc(const char *name); bool ima_template_has_modsig(const struct ima_template_desc *ima_template); int ima_restore_measurement_entry(struct ima_template_entry *entry); int ima_restore_measurement_list(loff_t bufsize, void *buf); int ima_measurements_show(struct seq_file *m, void *v); unsigned long ima_get_binary_runtime_size(void); int ima_init_template(void); void ima_init_template_list(void); int __init ima_init_digests(void); void __init ima_init_reboot_notifier(void); int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event, void *lsm_data); /* * used to protect h_table and sha_table */ extern spinlock_t ima_queue_lock; struct ima_h_table { atomic_long_t len; /* number of stored measurements in the list */ atomic_long_t violations; struct hlist_head queue[IMA_MEASURE_HTABLE_SIZE]; }; extern struct ima_h_table ima_htable; static inline unsigned int ima_hash_key(u8 *digest) { /* there is no point in taking a hash of part of a digest */ return (digest[0] | digest[1] << 8) % IMA_MEASURE_HTABLE_SIZE; } #define __ima_hooks(hook) \ hook(NONE, none) \ hook(FILE_CHECK, file) \ hook(MMAP_CHECK, mmap) \ hook(MMAP_CHECK_REQPROT, mmap_reqprot) \ hook(BPRM_CHECK, bprm) \ hook(CREDS_CHECK, creds) \ hook(POST_SETATTR, post_setattr) \ hook(MODULE_CHECK, module) \ hook(FIRMWARE_CHECK, firmware) \ hook(KEXEC_KERNEL_CHECK, kexec_kernel) \ hook(KEXEC_INITRAMFS_CHECK, kexec_initramfs) \ hook(POLICY_CHECK, policy) \ hook(KEXEC_CMDLINE, kexec_cmdline) \ hook(KEY_CHECK, key) \ hook(CRITICAL_DATA, critical_data) \ hook(SETXATTR_CHECK, setxattr_check) \ hook(MAX_CHECK, none) #define __ima_hook_enumify(ENUM, str) ENUM, #define __ima_stringify(arg) (#arg) #define __ima_hook_measuring_stringify(ENUM, str) \ (__ima_stringify(measuring_ ##str)), enum ima_hooks { __ima_hooks(__ima_hook_enumify) }; static const char * const ima_hooks_measure_str[] = { __ima_hooks(__ima_hook_measuring_stringify) }; static inline const char *func_measure_str(enum ima_hooks func) { if (func >= MAX_CHECK) return ima_hooks_measure_str[NONE]; return ima_hooks_measure_str[func]; } extern const char *const func_tokens[]; struct modsig; #ifdef CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS /* * To track keys that need to be measured. */ struct ima_key_entry { struct list_head list; void *payload; size_t payload_len; char *keyring_name; }; void ima_init_key_queue(void); bool ima_should_queue_key(void); bool ima_queue_key(struct key *keyring, const void *payload, size_t payload_len); void ima_process_queued_keys(void); #else static inline void ima_init_key_queue(void) {} static inline bool ima_should_queue_key(void) { return false; } static inline bool ima_queue_key(struct key *keyring, const void *payload, size_t payload_len) { return false; } static inline void ima_process_queued_keys(void) {} #endif /* CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS */ /* LIM API function definitions */ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode, const struct cred *cred, struct lsm_prop *prop, int mask, enum ima_hooks func, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos); int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func); int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file, void *buf, loff_t size, enum hash_algo algo, struct modsig *modsig); void ima_store_measurement(struct ima_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, int xattr_len, const struct modsig *modsig, int pcr, struct ima_template_desc *template_desc); int process_buffer_measurement(struct mnt_idmap *idmap, struct inode *inode, const void *buf, int size, const char *eventname, enum ima_hooks func, int pcr, const char *func_data, bool buf_hash, u8 *digest, size_t digest_len); void ima_audit_measurement(struct ima_iint_cache *iint, const unsigned char *filename); int ima_alloc_init_template(struct ima_event_data *event_data, struct ima_template_entry **entry, struct ima_template_desc *template_desc); int ima_store_template(struct ima_template_entry *entry, int violation, struct inode *inode, const unsigned char *filename, int pcr); void ima_free_template_entry(struct ima_template_entry *entry); const char *ima_d_path(const struct path *path, char **pathbuf, char *filename); /* IMA policy related functions */ int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode, const struct cred *cred, struct lsm_prop *prop, enum ima_hooks func, int mask, int flags, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos); void ima_init_policy(void); void ima_update_policy(void); void ima_update_policy_flags(void); ssize_t ima_parse_add_rule(char *); void ima_delete_rules(void); int ima_check_policy(void); void *ima_policy_start(struct seq_file *m, loff_t *pos); void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos); void ima_policy_stop(struct seq_file *m, void *v); int ima_policy_show(struct seq_file *m, void *v); /* Appraise integrity measurements */ #define IMA_APPRAISE_ENFORCE 0x01 #define IMA_APPRAISE_FIX 0x02 #define IMA_APPRAISE_LOG 0x04 #define IMA_APPRAISE_MODULES 0x08 #define IMA_APPRAISE_FIRMWARE 0x10 #define IMA_APPRAISE_POLICY 0x20 #define IMA_APPRAISE_KEXEC 0x40 #ifdef CONFIG_IMA_APPRAISE int ima_check_blacklist(struct ima_iint_cache *iint, const struct modsig *modsig, int pcr); int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, int xattr_len, const struct modsig *modsig); int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct ima_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint, enum ima_hooks func); enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value, int xattr_len); int ima_read_xattr(struct dentry *dentry, struct evm_ima_xattr_data **xattr_value, int xattr_len); void __init init_ima_appraise_lsm(const struct lsm_id *lsmid); #else static inline int ima_check_blacklist(struct ima_iint_cache *iint, const struct modsig *modsig, int pcr) { return 0; } static inline int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, int xattr_len, const struct modsig *modsig) { return INTEGRITY_UNKNOWN; } static inline int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode, int mask, enum ima_hooks func) { return 0; } static inline void ima_update_xattr(struct ima_iint_cache *iint, struct file *file) { } static inline enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint, enum ima_hooks func) { return INTEGRITY_UNKNOWN; } static inline enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len) { return ima_hash_algo; } static inline int ima_read_xattr(struct dentry *dentry, struct evm_ima_xattr_data **xattr_value, int xattr_len) { return 0; } static inline void __init init_ima_appraise_lsm(const struct lsm_id *lsmid) { } #endif /* CONFIG_IMA_APPRAISE */ #ifdef CONFIG_IMA_APPRAISE_MODSIG int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len, struct modsig **modsig); void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size); int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo, const u8 **digest, u32 *digest_size); int ima_get_raw_modsig(const struct modsig *modsig, const void **data, u32 *data_len); void ima_free_modsig(struct modsig *modsig); #else static inline int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len, struct modsig **modsig) { return -EOPNOTSUPP; } static inline void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size) { } static inline int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo, const u8 **digest, u32 *digest_size) { return -EOPNOTSUPP; } static inline int ima_get_raw_modsig(const struct modsig *modsig, const void **data, u32 *data_len) { return -EOPNOTSUPP; } static inline void ima_free_modsig(struct modsig *modsig) { } #endif /* CONFIG_IMA_APPRAISE_MODSIG */ /* LSM based policy rules require audit */ #ifdef CONFIG_IMA_LSM_RULES #define ima_filter_rule_init security_audit_rule_init #define ima_filter_rule_free security_audit_rule_free #define ima_filter_rule_match security_audit_rule_match #else static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp) { return -EINVAL; } static inline void ima_filter_rule_free(void *lsmrule) { } static inline int ima_filter_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *lsmrule) { return -EINVAL; } #endif /* CONFIG_IMA_LSM_RULES */ #ifdef CONFIG_IMA_READ_POLICY #define POLICY_FILE_FLAGS (S_IWUSR | S_IRUSR) #else #define POLICY_FILE_FLAGS S_IWUSR #endif /* CONFIG_IMA_READ_POLICY */ #endif /* __LINUX_IMA_H */
18 17 47 2 2 2 57 2 2 28 26 57 39 3 33 36 4 14 18 22 40 30 1 2 13 15 15 12 20 8 11 17 14 7 2 3 15 12 20 8 1 3 14 17 4 14 3 14 11 7 17 17 31 1 47 47 27 9 20 27 13 18 2 4 4 6 9 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_tbf.c Token Bucket Filter queue. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - * original idea by Martin Devera */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/gso.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> /* Simple Token Bucket Filter. ======================================= SOURCE. ------- None. Description. ------------ A data flow obeys TBF with rate R and depth B, if for any time interval t_i...t_f the number of transmitted bits does not exceed B + R*(t_f-t_i). Packetized version of this definition: The sequence of packets of sizes s_i served at moments t_i obeys TBF, if for any i<=k: s_i+....+s_k <= B + R*(t_k - t_i) Algorithm. ---------- Let N(t_i) be B/R initially and N(t) grow continuously with time as: N(t+delta) = min{B/R, N(t) + delta} If the first packet in queue has length S, it may be transmitted only at the time t_* when S/R <= N(t_*), and in this case N(t) jumps: N(t_* + 0) = N(t_* - 0) - S/R. Actually, QoS requires two TBF to be applied to a data stream. One of them controls steady state burst size, another one with rate P (peak rate) and depth M (equal to link MTU) limits bursts at a smaller time scale. It is easy to see that P>R, and B>M. If P is infinity, this double TBF is equivalent to a single one. When TBF works in reshaping mode, latency is estimated as: lat = max ((L-B)/R, (L-M)/P) NOTES. ------ If TBF throttles, it starts a watchdog timer, which will wake it up when it is ready to transmit. Note that the minimal timer resolution is 1/HZ. If no new packets arrive during this period, or if the device is not awaken by EOI for some previous packet, TBF can stop its activity for 1/HZ. This means, that with depth B, the maximal rate is R_crit = B*HZ F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. Note that the peak rate TBF is much more tough: with MTU 1500 P_crit = 150Kbytes/sec. So, if you need greater peak rates, use alpha with HZ=1000 :-) With classful TBF, limit is just kept for backwards compatibility. It is passed to the default bfifo qdisc - if the inner qdisc is changed the limit is not effective anymore. */ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ u32 max_size; s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ s64 mtu; struct psched_ratecfg rate; struct psched_ratecfg peak; /* Variables */ s64 tokens; /* Current number of B tokens */ s64 ptokens; /* Current number of P tokens */ s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; /* Time to Length, convert time in ns to length in bytes * to determinate how many bytes can be sent in given time. */ static u64 psched_ns_t2l(const struct psched_ratecfg *r, u64 time_in_ns) { /* The formula is : * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC */ u64 len = time_in_ns * r->rate_bytes_ps; do_div(len, NSEC_PER_SEC); if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { do_div(len, 53); len = len * 48; } if (len > r->overhead) len -= r->overhead; else len = 0; return len; } static void tbf_offload_change(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_REPLACE; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.replace_params.rate = q->rate; qopt.replace_params.max_size = q->max_size; qopt.replace_params.qstats = &sch->qstats; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_DESTROY; qopt.handle = sch->handle; qopt.parent = sch->parent; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static int tbf_offload_dump(struct Qdisc *sch) { struct tc_tbf_qopt_offload qopt; qopt.command = TC_TBF_STATS; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.stats.bstats = &sch->bstats; qopt.stats.qstats = &sch->qstats; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct tc_tbf_qopt_offload graft_offload = { .handle = sch->handle, .parent = sch->parent, .child_handle = new->handle, .command = TC_TBF_GRAFT, }; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, TC_SETUP_QDISC_TBF, &graft_offload, extack); } /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); unsigned int len = 0, prev_len = qdisc_pkt_len(skb), seg_len; int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) return qdisc_drop(skb, sch, to_free); nb = 0; skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); seg_len = segs->len; qdisc_skb_cb(segs)->pkt_len = seg_len; ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); } else { nb++; len += seg_len; } } sch->q.qlen += nb; sch->qstats.backlog += len; if (nb > 0) { qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return NET_XMIT_SUCCESS; } kfree_skb(skb); return NET_XMIT_DROP; } static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } static bool tbf_peak_present(const struct tbf_sched_data *q) { return q->peak.rate_bytes_ps; } static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; skb = q->qdisc->ops->peek(q->qdisc); if (skb) { s64 now; s64 toks; s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); now = ktime_get_ns(); toks = min_t(s64, now - q->t_c, q->buffer); if (tbf_peak_present(q)) { ptoks = toks + q->ptokens; if (ptoks > q->mtu) ptoks = q->mtu; ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; if (toks > q->buffer) toks = q->buffer; toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); if (unlikely(!skb)) return NULL; q->t_c = now; q->tokens = toks; q->ptokens = ptoks; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); return skb; } qdisc_watchdog_schedule_ns(&q->watchdog, now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, but, however, this is wrong in principle. We MUST NOT reorder packets under these circumstances. Really, if we split the flow into independent subflows, it would be a very good solution. This is the main idea of all FQ algorithms (cf. CSZ, HPFQ, HFSC) */ qdisc_qstats_overlimit(sch); } return NULL; } static void tbf_reset(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); } static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, [TCA_TBF_BURST] = { .type = NLA_U32 }, [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { int err; struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); if (err < 0) return err; err = -EINVAL; if (tb[TCA_TBF_PARMS] == NULL) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB], NULL)); if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB], NULL)); buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); if (tb[TCA_TBF_RATE64]) rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); psched_ratecfg_precompute(&rate, &qopt->rate, rate64); if (tb[TCA_TBF_BURST]) { max_size = nla_get_u32(tb[TCA_TBF_BURST]); buffer = psched_l2t_ns(&rate, max_size); } else { max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); } if (qopt->peakrate.rate) { if (tb[TCA_TBF_PRATE64]) prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", peak.rate_bytes_ps, rate.rate_bytes_ps); err = -EINVAL; goto done; } if (tb[TCA_TBF_PBURST]) { u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); max_size = min_t(u32, max_size, pburst); mtu = psched_l2t_ns(&peak, pburst); } else { max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); } } else { memset(&peak, 0, sizeof(peak)); } if (max_size < psched_mtu(qdisc_dev(sch))) pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", max_size, qdisc_dev(sch)->name, psched_mtu(qdisc_dev(sch))); if (!max_size) { err = -EINVAL; goto done; } if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) goto done; } else if (qopt->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, extack); if (IS_ERR(child)) { err = PTR_ERR(child); goto done; } /* child is fifo, no need to check for noop_qdisc */ qdisc_hash_add(child, true); } sch_tree_lock(sch); if (child) { qdisc_tree_flush_backlog(q->qdisc); old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) q->mtu = mtu; else q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; if (tb[TCA_TBF_BURST]) q->buffer = buffer; else q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); qdisc_put(old); err = 0; tbf_offload_change(sch); done: return err; } static int tbf_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; if (!opt) return -EINVAL; q->t_c = ktime_get_ns(); return tbf_change(sch, opt, extack); } static void tbf_destroy(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); tbf_offload_destroy(sch); qdisc_put(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *nest; struct tc_tbf_qopt opt; int err; err = tbf_offload_dump(sch); if (err) return err; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; opt.limit = q->limit; psched_ratecfg_getrate(&opt.rate, &q->rate); if (tbf_peak_present(q)) psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct tbf_sched_data *q = qdisc_priv(sch); tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); tbf_offload_graft(sch, new, *old, extack); return 0; } static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) { struct tbf_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long tbf_find(struct Qdisc *sch, u32 classid) { return 1; } static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { tc_qdisc_stats_dump(sch, 1, walker); } } static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, .find = tbf_find, .walk = tbf_walk, .dump = tbf_dump_class, }; static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .next = NULL, .cl_ops = &tbf_class_ops, .id = "tbf", .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, .init = tbf_init, .reset = tbf_reset, .destroy = tbf_destroy, .change = tbf_change, .dump = tbf_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("tbf"); static int __init tbf_module_init(void) { return register_qdisc(&tbf_qdisc_ops); } static void __exit tbf_module_exit(void) { unregister_qdisc(&tbf_qdisc_ops); } module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Token Bucket Filter qdisc");
8 2 2 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; struct in6_addr pfx; __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; /* Ensure that LSB of prefix is zero */ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) return -EINVAL; ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, struct in6_addr *addr) { unsigned int pfx_len; unsigned int i, idx; __be32 mask; __sum16 sum; pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); for (i = 0; i < pfx_len; i += 32) { if (pfx_len - i >= 32) mask = 0; else mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) idx = 3; else { for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { if ((__force __sum16)addr->s6_addr16[idx] != CSUM_MANGLED_0) break; } if (idx == ARRAY_SIZE(addr->s6_addr16)) return false; } sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; return true; } static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb, struct ipv6hdr *_bounced_hdr) { if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NULL; if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type)) return NULL; return skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct icmp6hdr), sizeof(struct ipv6hdr), _bounced_hdr); } static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } /* rewrite dst addr of bounced packet which was sent to dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->daddr); } return XT_CONTINUE; } static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; struct ipv6hdr _bounced_hdr; struct ipv6hdr *bounced_hdr; struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } /* rewrite src addr of bounced packet which was sent from dst range */ bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); if (bounced_hdr) { ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len); if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) ip6t_npt_map_pfx(npt, &bounced_hdr->saddr); } return XT_CONTINUE; } static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_POST_ROUTING), .me = THIS_MODULE, }, { .name = "DNPT", .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init ip6t_npt_init(void) { return xt_register_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } static void __exit ip6t_npt_exit(void) { xt_unregister_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } module_init(ip6t_npt_init); module_exit(ip6t_npt_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_SNPT"); MODULE_ALIAS("ip6t_DNPT");
20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/sunrpc/timer.c * * Estimate RPC request round trip time. * * Based on packet round-trip and variance estimator algorithms described * in appendix A of "Congestion Avoidance and Control" by Van Jacobson * and Michael J. Karels (ACM Computer Communication Review; Proceedings * of the Sigcomm '88 Symposium in Stanford, CA, August, 1988). * * This RTT estimator is used only for RPC over datagram protocols. * * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no> */ #include <asm/param.h> #include <linux/types.h> #include <linux/unistd.h> #include <linux/module.h> #include <linux/sunrpc/clnt.h> #define RPC_RTO_MAX (60*HZ) #define RPC_RTO_INIT (HZ/5) #define RPC_RTO_MIN (HZ/10) /** * rpc_init_rtt - Initialize an RPC RTT estimator context * @rt: context to initialize * @timeo: initial timeout value, in jiffies * */ void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) { unsigned long init = 0; unsigned int i; rt->timeo = timeo; if (timeo > RPC_RTO_INIT) init = (timeo - RPC_RTO_INIT) << 3; for (i = 0; i < 5; i++) { rt->srtt[i] = init; rt->sdrtt[i] = RPC_RTO_INIT; rt->ntimeouts[i] = 0; } } EXPORT_SYMBOL_GPL(rpc_init_rtt); /** * rpc_update_rtt - Update an RPC RTT estimator context * @rt: context to update * @timer: timer array index (request type) * @m: recent actual RTT, in jiffies * * NB: When computing the smoothed RTT and standard deviation, * be careful not to produce negative intermediate results. */ void rpc_update_rtt(struct rpc_rtt *rt, unsigned int timer, long m) { long *srtt, *sdrtt; if (timer-- == 0) return; /* jiffies wrapped; ignore this one */ if (m < 0) return; if (m == 0) m = 1L; srtt = (long *)&rt->srtt[timer]; m -= *srtt >> 3; *srtt += m; if (m < 0) m = -m; sdrtt = (long *)&rt->sdrtt[timer]; m -= *sdrtt >> 2; *sdrtt += m; /* Set lower bound on the variance */ if (*sdrtt < RPC_RTO_MIN) *sdrtt = RPC_RTO_MIN; } EXPORT_SYMBOL_GPL(rpc_update_rtt); /** * rpc_calc_rto - Provide an estimated timeout value * @rt: context to use for calculation * @timer: timer array index (request type) * * Estimate RTO for an NFS RPC sent via an unreliable datagram. Use * the mean and mean deviation of RTT for the appropriate type of RPC * for frequently issued RPCs, and a fixed default for the others. * * The justification for doing "other" this way is that these RPCs * happen so infrequently that timer estimation would probably be * stale. Also, since many of these RPCs are non-idempotent, a * conservative timeout is desired. * * getattr, lookup, * read, write, commit - A+4D * other - timeo */ unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned int timer) { unsigned long res; if (timer-- == 0) return rt->timeo; res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer]; if (res > RPC_RTO_MAX) res = RPC_RTO_MAX; return res; } EXPORT_SYMBOL_GPL(rpc_calc_rto);
18 14 3 9 9 7 7 2 9 9 9 4 5 1 1 1 1 6 6 3 3 6 4 4 3 6 2 1 2 1 3 3 3 1 13 1 2 1 1 1 27 2 2 24 21 2 15 3 4 3 10 10 3 1 4 5 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 /* net/tipc/udp_media.c: IP bearer support for TIPC * * Copyright (c) 2015, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <linux/socket.h> #include <linux/ip.h> #include <linux/udp.h> #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/kernel.h> #include <linux/workqueue.h> #include <linux/list.h> #include <net/sock.h> #include <net/ip.h> #include <net/udp_tunnel.h> #include <net/ipv6_stubs.h> #include <linux/tipc_netlink.h> #include "core.h" #include "addr.h" #include "net.h" #include "bearer.h" #include "netlink.h" #include "msg.h" #include "udp_media.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 #define UDP_MIN_HEADROOM 48 /** * struct udp_media_addr - IP/UDP addressing information * * This is the bearer level originating address used in neighbor discovery * messages, and all fields should be in network byte order * * @proto: Ethernet protocol in use * @port: port being used * @ipv4: IPv4 address of neighbor * @ipv6: IPv6 address of neighbor */ struct udp_media_addr { __be16 proto; __be16 port; union { struct in_addr ipv4; struct in6_addr ipv6; }; }; /* struct udp_replicast - container for UDP remote addresses */ struct udp_replicast { struct udp_media_addr addr; struct dst_cache dst_cache; struct rcu_head rcu; struct list_head list; }; /** * struct udp_bearer - ip/udp bearer data structure * @bearer: associated generic tipc bearer * @ubsock: bearer associated socket * @ifindex: local address scope * @work: used to schedule deferred work on a bearer * @rcast: associated udp_replicast container */ struct udp_bearer { struct tipc_bearer __rcu *bearer; struct socket *ubsock; u32 ifindex; struct work_struct work; struct udp_replicast rcast; }; static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) { if (ntohs(addr->proto) == ETH_P_IP) return ipv4_is_multicast(addr->ipv4.s_addr); #if IS_ENABLED(CONFIG_IPV6) else return ipv6_addr_is_multicast(&addr->ipv6); #endif return 0; } /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, struct udp_media_addr *ua) { memset(addr, 0, sizeof(struct tipc_media_addr)); addr->media_id = TIPC_MEDIA_TYPE_UDP; memcpy(addr->value, ua, sizeof(struct udp_media_addr)); if (tipc_udp_is_mcast_addr(ua)) addr->broadcast = TIPC_BROADCAST_SUPPORT; } /* tipc_udp_addr2str - convert ip/udp address to string */ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) { struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; if (ntohs(ua->proto) == ETH_P_IP) snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); else if (ntohs(ua->proto) == ETH_P_IPV6) snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); else { pr_err("Invalid UDP media address\n"); return 1; } return 0; } /* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, char *msg) { struct udp_media_addr *ua; ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) return -EINVAL; tipc_udp_media_addr_set(a, ua); return 0; } /* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) { memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, sizeof(struct udp_media_addr)); return 0; } /* tipc_send_msg - enqueue a send request */ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_bearer *ub, struct udp_media_addr *src, struct udp_media_addr *dst, struct dst_cache *cache) { struct dst_entry *ndst; int ttl, err = 0; local_bh_disable(); ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { struct rtable *rt = dst_rtable(ndst); if (!rt) { struct flowi4 fl = { .daddr = dst->ipv4.s_addr, .saddr = src->ipv4.s_addr, .flowi4_mark = skb->mark, .flowi4_proto = IPPROTO_UDP }; rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto tx_error; } dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, dst->port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { if (!ndst) { struct flowi6 fl6 = { .flowi6_oif = ub->ifindex, .daddr = dst->ipv6, .saddr = src->ipv6, .flowi6_proto = IPPROTO_UDP }; ndst = ipv6_stub->ipv6_dst_lookup_flow(net, ub->ubsock->sk, &fl6, NULL); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); goto tx_error; } dst_cache_set_ip6(cache, ndst, &fl6.saddr); } ttl = ip6_dst_hoplimit(ndst); err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, &src->ipv6, &dst->ipv6, 0, ttl, 0, src->port, dst->port, false); #endif } local_bh_enable(); return err; tx_error: local_bh_enable(); kfree_skb(skb); return err; } static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *addr) { struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; struct udp_replicast *rcast; struct udp_bearer *ub; int err = 0; if (skb_headroom(skb) < UDP_MIN_HEADROOM) { err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); if (err) goto out; } skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference(b->media_ptr); if (!ub) { err = -ENODEV; goto out; } if (addr->broadcast != TIPC_REPLICAST_SUPPORT) return tipc_udp_xmit(net, skb, ub, src, dst, &ub->rcast.dst_cache); /* Replicast, send an skb to each configured IP address */ list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { struct sk_buff *_skb; _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) { err = -ENOMEM; goto out; } err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr, &rcast->dst_cache); if (err) goto out; } err = 0; out: kfree_skb(skb); return err; } static bool tipc_udp_is_known_peer(struct tipc_bearer *b, struct udp_media_addr *addr) { struct udp_replicast *rcast, *tmp; struct udp_bearer *ub; ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) { pr_err_ratelimited("UDP bearer instance not found\n"); return false; } list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr))) return true; } return false; } static int tipc_udp_rcast_add(struct tipc_bearer *b, struct udp_media_addr *addr) { struct udp_replicast *rcast; struct udp_bearer *ub; ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) return -ENODEV; rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); if (!rcast) return -ENOMEM; if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) { kfree(rcast); return -ENOMEM; } memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); if (ntohs(addr->proto) == ETH_P_IP) pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); #if IS_ENABLED(CONFIG_IPV6) else if (ntohs(addr->proto) == ETH_P_IPV6) pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); #endif b->bcast_addr.broadcast = TIPC_REPLICAST_SUPPORT; list_add_rcu(&rcast->list, &ub->rcast.list); return 0; } static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb) { struct udp_media_addr src = {0}; struct udp_media_addr *dst; dst = (struct udp_media_addr *)&b->bcast_addr.value; if (tipc_udp_is_mcast_addr(dst)) return 0; src.port = udp_hdr(skb)->source; if (ip_hdr(skb)->version == 4) { struct iphdr *iphdr = ip_hdr(skb); src.proto = htons(ETH_P_IP); src.ipv4.s_addr = iphdr->saddr; if (ipv4_is_multicast(iphdr->daddr)) return 0; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_hdr(skb)->version == 6) { struct ipv6hdr *iphdr = ipv6_hdr(skb); src.proto = htons(ETH_P_IPV6); src.ipv6 = iphdr->saddr; if (ipv6_addr_is_multicast(&iphdr->daddr)) return 0; #endif } else { return 0; } if (likely(tipc_udp_is_known_peer(b, &src))) return 0; return tipc_udp_rcast_add(b, &src); } /* tipc_udp_recv - read data from bearer socket */ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; struct tipc_msg *hdr; int err; ub = rcu_dereference_sk_user_data(sk); if (!ub) { pr_err_ratelimited("Failed to get UDP bearer reference"); goto out; } skb_pull(skb, sizeof(struct udphdr)); hdr = buf_msg(skb); b = rcu_dereference(ub->bearer); if (!b) goto out; if (b && test_bit(0, &b->up)) { TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(sock_net(sk), skb, b); return 0; } if (unlikely(msg_user(hdr) == LINK_CONFIG)) { err = tipc_udp_rcast_disc(b, skb); if (err) goto out; } out: kfree_skb(skb); return 0; } static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) { int err = 0; struct ip_mreqn mreqn; struct sock *sk = ub->ubsock->sk; if (ntohs(remote->proto) == ETH_P_IP) { mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; err = ip_mc_join_group(sk, &mreqn); #if IS_ENABLED(CONFIG_IPV6) } else { lock_sock(sk); err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); release_sock(sk); #endif } return err; } static int __tipc_nl_add_udp_addr(struct sk_buff *skb, struct udp_media_addr *addr, int nla_t) { if (ntohs(addr->proto) == ETH_P_IP) { struct sockaddr_in ip4; memset(&ip4, 0, sizeof(ip4)); ip4.sin_family = AF_INET; ip4.sin_port = addr->port; ip4.sin_addr.s_addr = addr->ipv4.s_addr; if (nla_put(skb, nla_t, sizeof(ip4), &ip4)) return -EMSGSIZE; #if IS_ENABLED(CONFIG_IPV6) } else if (ntohs(addr->proto) == ETH_P_IPV6) { struct sockaddr_in6 ip6; memset(&ip6, 0, sizeof(ip6)); ip6.sin6_family = AF_INET6; ip6.sin6_port = addr->port; memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); if (nla_put(skb, nla_t, sizeof(ip6), &ip6)) return -EMSGSIZE; #endif } return 0; } int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) { u32 bid = cb->args[0]; u32 skip_cnt = cb->args[1]; u32 portid = NETLINK_CB(cb->skb).portid; struct udp_replicast *rcast, *tmp; struct tipc_bearer *b; struct udp_bearer *ub; void *hdr; int err; int i; if (!bid && !skip_cnt) { struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct net *net = sock_net(skb->sk); struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; char *bname; if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, NULL); if (err) return err; if (!battrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); b = tipc_bearer_find(net, bname); if (!b) { rtnl_unlock(); return -EINVAL; } bid = b->identity; } else { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); if (!b) { rtnl_unlock(); return -EINVAL; } } ub = rtnl_dereference(b->media_ptr); if (!ub) { rtnl_unlock(); return -EINVAL; } i = 0; list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { if (i < skip_cnt) goto count; hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_BEARER_GET); if (!hdr) goto done; err = __tipc_nl_add_udp_addr(skb, &rcast->addr, TIPC_NLA_UDP_REMOTE); if (err) { genlmsg_cancel(skb, hdr); goto done; } genlmsg_end(skb, hdr); count: i++; } done: rtnl_unlock(); cb->args[0] = bid; cb->args[1] = i; return skb->len; } int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) { struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct udp_media_addr *dst; struct udp_bearer *ub; struct nlattr *nest; ub = rtnl_dereference(b->media_ptr); if (!ub) return -ENODEV; nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); if (!nest) goto msg_full; if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL)) goto msg_full; dst = (struct udp_media_addr *)&b->bcast_addr.value; if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE)) goto msg_full; if (!list_empty(&ub->rcast.list)) { if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP)) goto msg_full; } nla_nest_end(msg->skb, nest); return 0; msg_full: nla_nest_cancel(msg->skb, nest); return -EMSGSIZE; } /** * tipc_parse_udp_addr - build udp media address from netlink data * @nla: netlink attribute containing sockaddr storage aligned address * @addr: tipc media address to fill with address, port and protocol type * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, u32 *scope_id) { struct sockaddr_storage sa; nla_memcpy(&sa, nla, sizeof(sa)); if (sa.ss_family == AF_INET) { struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa; addr->proto = htons(ETH_P_IP); addr->port = ip4->sin_port; addr->ipv4.s_addr = ip4->sin_addr.s_addr; return 0; #if IS_ENABLED(CONFIG_IPV6) } else if (sa.ss_family == AF_INET6) { struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa; addr->proto = htons(ETH_P_IPV6); addr->port = ip6->sin6_port; memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); /* Scope ID is only interesting for local addresses */ if (scope_id) { int atype; atype = ipv6_addr_type(&ip6->sin6_addr); if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id) { return -EINVAL; } *scope_id = ip6->sin6_scope_id ? : 0; } return 0; #endif } return -EADDRNOTAVAIL; } int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) { int err; struct udp_media_addr addr = {0}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) return -EINVAL; err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); if (err) return err; dst = (struct udp_media_addr *)&b->bcast_addr.value; if (tipc_udp_is_mcast_addr(dst)) { pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); return -EINVAL; } if (tipc_udp_is_known_peer(b, &addr)) return 0; return tipc_udp_rcast_add(b, &addr); } /** * tipc_udp_enable - callback to create a new udp bearer instance * @net: network namespace * @b: pointer to generic tipc_bearer * @attrs: netlink bearer configuration * * validate the bearer parameters and initialize the udp bearer * rtnl_lock should be held */ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct nlattr *attrs[]) { int err = -EINVAL; struct udp_bearer *ub; struct udp_media_addr remote = {0}; struct udp_media_addr local = {0}; struct udp_port_cfg udp_conf = {0}; struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; u8 node_id[NODE_ID_LEN] = {0,}; struct net_device *dev; int rmcast = 0; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); if (!ub) return -ENOMEM; INIT_LIST_HEAD(&ub->rcast.list); if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) goto err; if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { pr_err("Invalid UDP bearer configuration"); err = -EINVAL; goto err; } err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, &ub->ifindex); if (err) goto err; err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); if (err) goto err; if (remote.proto != local.proto) { err = -EINVAL; goto err; } /* Checking remote ip address */ rmcast = tipc_udp_is_mcast_addr(&remote); /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net)) { memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16); tipc_net_init(net, node_id, 0); } if (!tipc_own_id(net)) { pr_warn("Failed to set node id, please configure manually\n"); err = -EINVAL; goto err; } b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; rcu_assign_pointer(b->media_ptr, ub); rcu_assign_pointer(ub->bearer, b); tipc_udp_media_addr_set(&b->addr, &local); if (local.proto == htons(ETH_P_IP)) { dev = __ip_dev_find(net, local.ipv4.s_addr, false); if (!dev) { err = -ENODEV; goto err; } udp_conf.family = AF_INET; /* Switch to use ANY to receive packets from group */ if (rmcast) udp_conf.local_ip.s_addr = htonl(INADDR_ANY); else udp_conf.local_ip.s_addr = local.ipv4.s_addr; udp_conf.use_udp_checksums = false; ub->ifindex = dev->ifindex; b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr); b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; dev = ipv6_dev_find(net, &local.ipv6, dev); if (!dev) { err = -ENODEV; goto err; } udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = true; udp_conf.use_udp6_rx_checksums = true; if (rmcast) udp_conf.local_ip6 = in6addr_any; else udp_conf.local_ip6 = local.ipv6; ub->ifindex = dev->ifindex; b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr); b->mtu = 1280; #endif } else { err = -EAFNOSUPPORT; goto err; } udp_conf.local_udp_port = local.port; err = udp_sock_create(net, &udp_conf, &ub->ubsock); if (err) goto err; tuncfg.sk_user_data = ub; tuncfg.encap_type = 1; tuncfg.encap_rcv = tipc_udp_recv; tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC); if (err) goto free; /* * The bcast media address port is used for all peers and the ip * is used if it's a multicast address. */ memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); if (rmcast) err = enable_mcast(ub, &remote); else err = tipc_udp_rcast_add(b, &remote); if (err) goto free; return 0; free: dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); err: kfree(ub); return err; } /* cleanup_bearer - break the socket/bearer association */ static void cleanup_bearer(struct work_struct *work) { struct udp_bearer *ub = container_of(work, struct udp_bearer, work); struct udp_replicast *rcast, *tmp; struct tipc_net *tn; list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { dst_cache_destroy(&rcast->dst_cache); list_del_rcu(&rcast->list); kfree_rcu(rcast, rcu); } tn = tipc_net(sock_net(ub->ubsock->sk)); dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); /* Note: could use a call_rcu() to avoid another synchronize_net() */ synchronize_net(); atomic_dec(&tn->wq_count); kfree(ub); } /* tipc_udp_disable - detach bearer from socket */ static void tipc_udp_disable(struct tipc_bearer *b) { struct udp_bearer *ub; ub = rtnl_dereference(b->media_ptr); if (!ub) { pr_err("UDP bearer instance not found\n"); return; } sock_set_flag(ub->ubsock->sk, SOCK_DEAD); RCU_INIT_POINTER(ub->bearer, NULL); /* sock_release need to be done outside of rtnl lock */ atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); INIT_WORK(&ub->work, cleanup_bearer); schedule_work(&ub->work); } struct tipc_media udp_media_info = { .send_msg = tipc_udp_send_msg, .enable_media = tipc_udp_enable, .disable_media = tipc_udp_disable, .addr2str = tipc_udp_addr2str, .addr2msg = tipc_udp_addr2msg, .msg2addr = tipc_udp_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .min_win = TIPC_DEF_LINK_WIN, .max_win = TIPC_DEF_LINK_WIN, .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, .name = "udp" };
28 49 21122 19606 19607 2731 21119 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 /* SPDX-License-Identifier: GPL-2.0-only */ /* * AppArmor security module * * This file contains AppArmor label definitions * * Copyright 2017 Canonical Ltd. */ #ifndef __AA_LABEL_H #define __AA_LABEL_H #include <linux/atomic.h> #include <linux/audit.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> #include "apparmor.h" #include "lib.h" struct aa_ns; #define LOCAL_VEC_ENTRIES 8 #define DEFINE_VEC(T, V) \ struct aa_ ## T *(_ ## V ## _localtmp)[LOCAL_VEC_ENTRIES]; \ struct aa_ ## T **(V) #define vec_setup(T, V, N, GFP) \ ({ \ if ((N) <= LOCAL_VEC_ENTRIES) { \ typeof(N) i; \ (V) = (_ ## V ## _localtmp); \ for (i = 0; i < (N); i++) \ (V)[i] = NULL; \ } else \ (V) = kzalloc(sizeof(struct aa_ ## T *) * (N), (GFP)); \ (V) ? 0 : -ENOMEM; \ }) #define vec_cleanup(T, V, N) \ do { \ int i; \ for (i = 0; i < (N); i++) { \ if (!IS_ERR_OR_NULL((V)[i])) \ aa_put_ ## T((V)[i]); \ } \ if ((V) != _ ## V ## _localtmp) \ kfree(V); \ } while (0) #define vec_last(VEC, SIZE) ((VEC)[(SIZE) - 1]) #define vec_ns(VEC, SIZE) (vec_last((VEC), (SIZE))->ns) #define vec_labelset(VEC, SIZE) (&vec_ns((VEC), (SIZE))->labels) #define cleanup_domain_vec(V, L) cleanup_label_vec((V), (L)->size) struct aa_profile; #define VEC_FLAG_TERMINATE 1 int aa_vec_unique(struct aa_profile **vec, int n, int flags); struct aa_label *aa_vec_find_or_create_label(struct aa_profile **vec, int len, gfp_t gfp); #define aa_sort_and_merge_vec(N, V) \ aa_sort_and_merge_profiles((N), (struct aa_profile **)(V)) /* struct aa_labelset - set of labels for a namespace * * Labels are reference counted; aa_labelset does not contribute to label * reference counts. Once a label's last refcount is put it is removed from * the set. */ struct aa_labelset { rwlock_t lock; struct rb_root root; }; #define __labelset_for_each(LS, N) \ for ((N) = rb_first(&(LS)->root); (N); (N) = rb_next(N)) enum label_flags { FLAG_HAT = 1, /* profile is a hat */ FLAG_UNCONFINED = 2, /* label unconfined only if all */ FLAG_NULL = 4, /* profile is null learning profile */ FLAG_IX_ON_NAME_ERROR = 8, /* fallback to ix on name lookup fail */ FLAG_IMMUTIBLE = 0x10, /* don't allow changes/replacement */ FLAG_USER_DEFINED = 0x20, /* user based profile - lower privs */ FLAG_NO_LIST_REF = 0x40, /* list doesn't keep profile ref */ FLAG_NS_COUNT = 0x80, /* carries NS ref count */ FLAG_IN_TREE = 0x100, /* label is in tree */ FLAG_PROFILE = 0x200, /* label is a profile */ FLAG_EXPLICIT = 0x400, /* explicit static label */ FLAG_STALE = 0x800, /* replaced/removed */ FLAG_RENAMED = 0x1000, /* label has renaming in it */ FLAG_REVOKED = 0x2000, /* label has revocation in it */ FLAG_DEBUG1 = 0x4000, FLAG_DEBUG2 = 0x8000, /* These flags must correspond with PATH_flags */ /* TODO: add new path flags */ }; struct aa_label; struct aa_proxy { struct kref count; struct aa_label __rcu *label; }; struct label_it { int i, j; }; /* struct aa_label - lazy labeling struct * @count: ref count of active users * @node: rbtree position * @rcu: rcu callback struct * @proxy: is set to the label that replaced this label * @hname: text representation of the label (MAYBE_NULL) * @flags: stale and other flags - values may change under label set lock * @secid: secid that references this label * @size: number of entries in @ent[] * @ent: set of profiles for label, actual size determined by @size */ struct aa_label { struct kref count; struct rb_node node; struct rcu_head rcu; struct aa_proxy *proxy; __counted char *hname; long flags; u32 secid; int size; struct aa_profile *vec[]; }; #define last_error(E, FN) \ do { \ int __subE = (FN); \ if (__subE) \ (E) = __subE; \ } while (0) #define label_isprofile(X) ((X)->flags & FLAG_PROFILE) #define label_unconfined(X) ((X)->flags & FLAG_UNCONFINED) #define unconfined(X) label_unconfined(X) #define label_is_stale(X) ((X)->flags & FLAG_STALE) #define __label_make_stale(X) ((X)->flags |= FLAG_STALE) #define labels_ns(X) (vec_ns(&((X)->vec[0]), (X)->size)) #define labels_set(X) (&labels_ns(X)->labels) #define labels_view(X) labels_ns(X) #define labels_profile(X) ((X)->vec[(X)->size - 1]) int aa_label_next_confined(struct aa_label *l, int i); /* for each profile in a label */ #define label_for_each(I, L, P) \ for ((I).i = 0; ((P) = (L)->vec[(I).i]); ++((I).i)) /* assumes break/goto ended label_for_each */ #define label_for_each_cont(I, L, P) \ for (++((I).i); ((P) = (L)->vec[(I).i]); ++((I).i)) /* for each profile that is enforcing confinement in a label */ #define label_for_each_confined(I, L, P) \ for ((I).i = aa_label_next_confined((L), 0); \ ((P) = (L)->vec[(I).i]); \ (I).i = aa_label_next_confined((L), (I).i + 1)) #define label_for_each_in_merge(I, A, B, P) \ for ((I).i = (I).j = 0; \ ((P) = aa_label_next_in_merge(&(I), (A), (B))); \ ) #define label_for_each_not_in_set(I, SET, SUB, P) \ for ((I).i = (I).j = 0; \ ((P) = __aa_label_next_not_in_set(&(I), (SET), (SUB))); \ ) #define next_in_ns(i, NS, L) \ ({ \ typeof(i) ___i = (i); \ while ((L)->vec[___i] && (L)->vec[___i]->ns != (NS)) \ (___i)++; \ (___i); \ }) #define label_for_each_in_ns(I, NS, L, P) \ for ((I).i = next_in_ns(0, (NS), (L)); \ ((P) = (L)->vec[(I).i]); \ (I).i = next_in_ns((I).i + 1, (NS), (L))) #define fn_for_each_in_ns(L, P, FN) \ ({ \ struct label_it __i; \ struct aa_ns *__ns = labels_ns(L); \ int __E = 0; \ label_for_each_in_ns(__i, __ns, (L), (P)) { \ last_error(__E, (FN)); \ } \ __E; \ }) #define fn_for_each_XXX(L, P, FN, ...) \ ({ \ struct label_it i; \ int __E = 0; \ label_for_each ## __VA_ARGS__(i, (L), (P)) { \ last_error(__E, (FN)); \ } \ __E; \ }) #define fn_for_each(L, P, FN) fn_for_each_XXX(L, P, FN) #define fn_for_each_confined(L, P, FN) fn_for_each_XXX(L, P, FN, _confined) #define fn_for_each2_XXX(L1, L2, P, FN, ...) \ ({ \ struct label_it i; \ int __E = 0; \ label_for_each ## __VA_ARGS__(i, (L1), (L2), (P)) { \ last_error(__E, (FN)); \ } \ __E; \ }) #define fn_for_each_in_merge(L1, L2, P, FN) \ fn_for_each2_XXX((L1), (L2), P, FN, _in_merge) #define fn_for_each_not_in_set(L1, L2, P, FN) \ fn_for_each2_XXX((L1), (L2), P, FN, _not_in_set) #define LABEL_MEDIATES(L, C) \ ({ \ struct aa_profile *profile; \ struct label_it i; \ int ret = 0; \ label_for_each(i, (L), profile) { \ if (RULE_MEDIATES(&profile->rules, (C))) { \ ret = 1; \ break; \ } \ } \ ret; \ }) void aa_labelset_destroy(struct aa_labelset *ls); void aa_labelset_init(struct aa_labelset *ls); void __aa_labelset_update_subtree(struct aa_ns *ns); void aa_label_destroy(struct aa_label *label); void aa_label_free(struct aa_label *label); void aa_label_kref(struct kref *kref); bool aa_label_init(struct aa_label *label, int size, gfp_t gfp); struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp); bool aa_label_is_subset(struct aa_label *set, struct aa_label *sub); bool aa_label_is_unconfined_subset(struct aa_label *set, struct aa_label *sub); struct aa_profile *__aa_label_next_not_in_set(struct label_it *I, struct aa_label *set, struct aa_label *sub); bool aa_label_remove(struct aa_label *label); struct aa_label *aa_label_insert(struct aa_labelset *ls, struct aa_label *l); bool aa_label_replace(struct aa_label *old, struct aa_label *new); bool aa_label_make_newest(struct aa_labelset *ls, struct aa_label *old, struct aa_label *new); struct aa_profile *aa_label_next_in_merge(struct label_it *I, struct aa_label *a, struct aa_label *b); struct aa_label *aa_label_find_merge(struct aa_label *a, struct aa_label *b); struct aa_label *aa_label_merge(struct aa_label *a, struct aa_label *b, gfp_t gfp); bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp); #define FLAGS_NONE 0 #define FLAG_SHOW_MODE 1 #define FLAG_VIEW_SUBNS 2 #define FLAG_HIDDEN_UNCONFINED 4 #define FLAG_ABS_ROOT 8 int aa_label_snxprint(char *str, size_t size, struct aa_ns *view, struct aa_label *label, int flags); int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label, int flags, gfp_t gfp); int aa_label_acntsxprint(char __counted **strp, struct aa_ns *ns, struct aa_label *label, int flags, gfp_t gfp); void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns, struct aa_label *label, int flags, gfp_t gfp); void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns, struct aa_label *label, int flags, gfp_t gfp); void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags, gfp_t gfp); void aa_label_printk(struct aa_label *label, gfp_t gfp); struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str, size_t n, gfp_t gfp, bool create, bool force_stack); struct aa_label *aa_label_parse(struct aa_label *base, const char *str, gfp_t gfp, bool create, bool force_stack); static inline const char *aa_label_strn_split(const char *str, int n) { const char *pos; aa_state_t state; state = aa_dfa_matchn_until(stacksplitdfa, DFA_START, str, n, &pos); if (!ACCEPT_TABLE(stacksplitdfa)[state]) return NULL; return pos - 3; } static inline const char *aa_label_str_split(const char *str) { const char *pos; aa_state_t state; state = aa_dfa_match_until(stacksplitdfa, DFA_START, str, &pos); if (!ACCEPT_TABLE(stacksplitdfa)[state]) return NULL; return pos - 3; } struct aa_perms; struct aa_ruleset; int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules, struct aa_label *label, aa_state_t state, bool subns, u32 request, struct aa_perms *perms); /** * __aa_get_label - get a reference count to uncounted label reference * @l: reference to get a count on * * Returns: pointer to reference OR NULL if race is lost and reference is * being repeated. * Requires: lock held, and the return code MUST be checked */ static inline struct aa_label *__aa_get_label(struct aa_label *l) { if (l && kref_get_unless_zero(&l->count)) return l; return NULL; } static inline struct aa_label *aa_get_label(struct aa_label *l) { if (l) kref_get(&(l->count)); return l; } /** * aa_get_label_rcu - increment refcount on a label that can be replaced * @l: pointer to label that can be replaced (NOT NULL) * * Returns: pointer to a refcounted label. * else NULL if no label */ static inline struct aa_label *aa_get_label_rcu(struct aa_label __rcu **l) { struct aa_label *c; rcu_read_lock(); do { c = rcu_dereference(*l); } while (c && !kref_get_unless_zero(&c->count)); rcu_read_unlock(); return c; } /** * aa_get_newest_label - find the newest version of @l * @l: the label to check for newer versions of * * Returns: refcounted newest version of @l taking into account * replacement, renames and removals * return @l. */ static inline struct aa_label *aa_get_newest_label(struct aa_label *l) { if (!l) return NULL; if (label_is_stale(l)) { struct aa_label *tmp; AA_BUG(!l->proxy); AA_BUG(!l->proxy->label); /* BUG: only way this can happen is @l ref count and its * replacement count have gone to 0 and are on their way * to destruction. ie. we have a refcounting error */ tmp = aa_get_label_rcu(&l->proxy->label); AA_BUG(!tmp); return tmp; } return aa_get_label(l); } static inline void aa_put_label(struct aa_label *l) { if (l) kref_put(&l->count, aa_label_kref); } struct aa_proxy *aa_alloc_proxy(struct aa_label *l, gfp_t gfp); void aa_proxy_kref(struct kref *kref); static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy) { if (proxy) kref_get(&(proxy->count)); return proxy; } static inline void aa_put_proxy(struct aa_proxy *proxy) { if (proxy) kref_put(&proxy->count, aa_proxy_kref); } void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new); #endif /* __AA_LABEL_H */
119 1863 1864 1859 125 4451 4448 4449 4452 4446 4452 3998 3751 312 1960 2176 2345 1834 3549 600 4000 4000 4000 31 31 1761 1758 1759 1743 11 7 531 1264 1701 58 1758 1758 1756 1762 325 34890 35161 119 341 1475 1482 1471 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/file_table.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/eventpoll.h> #include <linux/rcupdate.h> #include <linux/mount.h> #include <linux/capability.h> #include <linux/cdev.h> #include <linux/fsnotify.h> #include <linux/sysctl.h> #include <linux/percpu_counter.h> #include <linux/percpu.h> #include <linux/task_work.h> #include <linux/swap.h> #include <linux/kmemleak.h> #include <linux/atomic.h> #include "internal.h" /* sysctl tunables... */ static struct files_stat_struct files_stat = { .max_files = NR_FILE }; /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __ro_after_init; static struct kmem_cache *bfilp_cachep __ro_after_init; static struct percpu_counter nr_files __cacheline_aligned_in_smp; /* Container for backing file with optional user path */ struct backing_file { struct file file; union { struct path user_path; freeptr_t bf_freeptr; }; }; static inline struct backing_file *backing_file(struct file *f) { return container_of(f, struct backing_file, file); } struct path *backing_file_user_path(struct file *f) { return &backing_file(f)->user_path; } EXPORT_SYMBOL_GPL(backing_file_user_path); static inline void file_free(struct file *f) { security_file_free(f); if (likely(!(f->f_mode & FMODE_NOACCOUNT))) percpu_counter_dec(&nr_files); put_cred(f->f_cred); if (unlikely(f->f_mode & FMODE_BACKING)) { path_put(backing_file_user_path(f)); kmem_cache_free(bfilp_cachep, backing_file(f)); } else { kmem_cache_free(filp_cachep, f); } } /* * Return the total number of open files in the system */ static long get_nr_files(void) { return percpu_counter_read_positive(&nr_files); } /* * Return the maximum number of open files in the system */ unsigned long get_max_files(void) { return files_stat.max_files; } EXPORT_SYMBOL_GPL(get_max_files); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) /* * Handle nr_files sysctl */ static int proc_nr_files(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static const struct ctl_table fs_stat_sysctls[] = { { .procname = "file-nr", .data = &files_stat, .maxlen = sizeof(files_stat), .mode = 0444, .proc_handler = proc_nr_files, }, { .procname = "file-max", .data = &files_stat.max_files, .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = SYSCTL_LONG_ZERO, .extra2 = SYSCTL_LONG_MAX, }, { .procname = "nr_open", .data = &sysctl_nr_open, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = &sysctl_nr_open_min, .extra2 = &sysctl_nr_open_max, }, }; static int __init init_fs_stat_sysctls(void) { register_sysctl_init("fs", fs_stat_sysctls); if (IS_ENABLED(CONFIG_BINFMT_MISC)) { struct ctl_table_header *hdr; hdr = register_sysctl_mount_point("fs/binfmt_misc"); kmemleak_not_leak(hdr); } return 0; } fs_initcall(init_fs_stat_sysctls); #endif static int init_file(struct file *f, int flags, const struct cred *cred) { int error; f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { put_cred(f->f_cred); return error; } spin_lock_init(&f->f_lock); /* * Note that f_pos_lock is only used for files raising * FMODE_ATOMIC_POS and directories. Other files such as pipes * don't need it and since f_pos_lock is in a union may reuse * the space for other purposes. They are expected to initialize * the respective member when opening the file. */ mutex_init(&f->f_pos_lock); memset(&f->f_path, 0, sizeof(f->f_path)); memset(&f->f_ra, 0, sizeof(f->f_ra)); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); f->f_op = NULL; f->f_mapping = NULL; f->private_data = NULL; f->f_inode = NULL; f->f_owner = NULL; #ifdef CONFIG_EPOLL f->f_ep = NULL; #endif f->f_iocb_flags = 0; f->f_pos = 0; f->f_wb_err = 0; f->f_sb_err = 0; /* * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While * fget-rcu pattern users need to be able to handle spurious * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); /* * Disable permission and pre-content events for all files by default. * They may be enabled later by file_set_fsnotify_mode_from_watchers(). */ file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } /* Find an unused file structure and return a pointer to it. * Returns an error pointer if some error happend e.g. we over file * structures limit, run out of memory or operation is not permitted. * * Be very careful using this. You are responsible for * getting write access to any mount that you might assign * to this filp, if it is opened for write. If this is not * done, you will imbalance int the mount's writer count * and a warning at __fput() time. */ struct file *alloc_empty_file(int flags, const struct cred *cred) { static long old_max; struct file *f; int error; /* * Privileged users can go above max_files */ if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { /* * percpu_counters are inaccurate. Do an expensive check before * we go and fail. */ if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files) goto over; } f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (unlikely(!f)) return ERR_PTR(-ENOMEM); error = init_file(f, flags, cred); if (unlikely(error)) { kmem_cache_free(filp_cachep, f); return ERR_PTR(error); } percpu_counter_inc(&nr_files); return f; over: /* Ran out of filps - report that */ if (get_nr_files() > old_max) { pr_info("VFS: file-max limit %lu reached\n", get_max_files()); old_max = get_nr_files(); } return ERR_PTR(-ENFILE); } /* * Variant of alloc_empty_file() that doesn't check and modify nr_files. * * This is only for kernel internal use, and the allocate file must not be * installed into file tables or such. */ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) { struct file *f; int error; f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (unlikely(!f)) return ERR_PTR(-ENOMEM); error = init_file(f, flags, cred); if (unlikely(error)) { kmem_cache_free(filp_cachep, f); return ERR_PTR(error); } f->f_mode |= FMODE_NOACCOUNT; return f; } /* * Variant of alloc_empty_file() that allocates a backing_file container * and doesn't check and modify nr_files. * * This is only for kernel internal use, and the allocate file must not be * installed into file tables or such. */ struct file *alloc_empty_backing_file(int flags, const struct cred *cred) { struct backing_file *ff; int error; ff = kmem_cache_alloc(bfilp_cachep, GFP_KERNEL); if (unlikely(!ff)) return ERR_PTR(-ENOMEM); error = init_file(&ff->file, flags, cred); if (unlikely(error)) { kmem_cache_free(bfilp_cachep, ff); return ERR_PTR(error); } ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT; return &ff->file; } /** * file_init_path - initialize a 'struct file' based on path * * @file: the file to set up * @path: the (dentry, vfsmount) pair for the new file * @fop: the 'struct file_operations' for the new file */ static void file_init_path(struct file *file, const struct path *path, const struct file_operations *fop) { file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); file->f_sb_err = file_sample_sb_err(file); if (fop->llseek) file->f_mode |= FMODE_LSEEK; if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) file->f_mode |= FMODE_CAN_READ; if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) file->f_mode |= FMODE_CAN_WRITE; file->f_iocb_flags = iocb_flags(file); file->f_mode |= FMODE_OPENED; file->f_op = fop; if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); } /** * alloc_file - allocate and initialize a 'struct file' * * @path: the (dentry, vfsmount) pair for the new file * @flags: O_... flags with which the new file will be opened * @fop: the 'struct file_operations' for the new file */ static struct file *alloc_file(const struct path *path, int flags, const struct file_operations *fop) { struct file *file; file = alloc_empty_file(flags, current_cred()); if (!IS_ERR(file)) file_init_path(file, path, fop); return file; } static inline int alloc_path_pseudo(const char *name, struct inode *inode, struct vfsmount *mnt, struct path *path) { path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name)); if (!path->dentry) return -ENOMEM; path->mnt = mntget(mnt); d_instantiate(path->dentry, inode); return 0; } struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, const char *name, int flags, const struct file_operations *fops) { int ret; struct path path; struct file *file; ret = alloc_path_pseudo(name, inode, mnt, &path); if (ret) return ERR_PTR(ret); file = alloc_file(&path, flags, fops); if (IS_ERR(file)) { ihold(inode); path_put(&path); return file; } /* * Disable all fsnotify events for pseudo files by default. * They may be enabled by caller with file_set_fsnotify_mode(). */ file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL(alloc_file_pseudo); struct file *alloc_file_pseudo_noaccount(struct inode *inode, struct vfsmount *mnt, const char *name, int flags, const struct file_operations *fops) { int ret; struct path path; struct file *file; ret = alloc_path_pseudo(name, inode, mnt, &path); if (ret) return ERR_PTR(ret); file = alloc_empty_file_noaccount(flags, current_cred()); if (IS_ERR(file)) { ihold(inode); path_put(&path); return file; } file_init_path(file, &path, fops); /* * Disable all fsnotify events for pseudo files by default. * They may be enabled by caller with file_set_fsnotify_mode(). */ file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount); struct file *alloc_file_clone(struct file *base, int flags, const struct file_operations *fops) { struct file *f; f = alloc_file(&base->f_path, flags, fops); if (!IS_ERR(f)) { path_get(&f->f_path); f->f_mapping = base->f_mapping; } return f; } /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; struct inode *inode = file->f_inode; fmode_t mode = file->f_mode; if (unlikely(!(file->f_mode & FMODE_OPENED))) goto out; might_sleep(); fsnotify_close(file); /* * The function eventpoll_release() should be the first called * in the file cleanup chain. */ eventpoll_release(file); locks_remove_file(file); security_file_release(file); if (unlikely(file->f_flags & FASYNC)) { if (file->f_op->fasync) file->f_op->fasync(-1, file, 0); } if (file->f_op->release) file->f_op->release(inode, file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); } fops_put(file->f_op); file_f_owner_release(file); put_file_access(file); dput(dentry); if (unlikely(mode & FMODE_NEED_UNMOUNT)) dissolve_on_fput(mnt); mntput(mnt); out: file_free(file); } static LLIST_HEAD(delayed_fput_list); static void delayed_fput(struct work_struct *unused) { struct llist_node *node = llist_del_all(&delayed_fput_list); struct file *f, *t; llist_for_each_entry_safe(f, t, node, f_llist) __fput(f); } static void ____fput(struct callback_head *work) { __fput(container_of(work, struct file, f_task_work)); } static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); /* * If kernel thread really needs to have the final fput() it has done * to complete, call this. The only user right now is the boot - we * *do* need to make sure our writes to binaries on initramfs has * not left us with opened struct file waiting for __fput() - execve() * won't work without that. Please, don't add more callers without * very good reasons; in particular, never call that with locks * held and never call that from a thread that might need to do * some work on any kind of umount. */ void flush_delayed_fput(void) { delayed_fput(NULL); flush_delayed_work(&delayed_fput_work); } EXPORT_SYMBOL_GPL(flush_delayed_fput); void fput(struct file *file) { if (file_ref_put(&file->f_ref)) { struct task_struct *task = current; if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) { file_free(file); return; } if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_task_work, ____fput); if (!task_work_add(task, &file->f_task_work, TWA_RESUME)) return; /* * After this task has run exit_task_work(), * task_work_add() will fail. Fall through to delayed * fput to avoid leaking *file. */ } if (llist_add(&file->f_llist, &delayed_fput_list)) schedule_delayed_work(&delayed_fput_work, 1); } } /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without * risking deadlocks), need to wait for completion of __fput() and know * for this specific struct file it won't involve anything that would * need them. Use only if you really need it - at the very least, * don't blindly convert fput() by kernel thread to that. */ void __fput_sync(struct file *file) { if (file_ref_put(&file->f_ref)) __fput(file); } EXPORT_SYMBOL(fput); EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { struct kmem_cache_args args = { .use_freeptr_offset = true, .freeptr_offset = offsetof(struct file, f_freeptr), }; filp_cachep = kmem_cache_create("filp", sizeof(struct file), &args, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); args.freeptr_offset = offsetof(struct backing_file, bf_freeptr); bfilp_cachep = kmem_cache_create("bfilp", sizeof(struct backing_file), &args, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); percpu_counter_init(&nr_files, 0, GFP_KERNEL); } /* * One file with associated inode and dcache is very roughly 1K. Per default * do not use more than 10% of our memory for files. */ void __init files_maxfiles_init(void) { unsigned long n; unsigned long nr_pages = totalram_pages(); unsigned long memreserve = (nr_pages - nr_free_pages()) * 3/2; memreserve = min(memreserve, nr_pages - 1); n = ((nr_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); }
5 8 7 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 // SPDX-License-Identifier: GPL-2.0-or-later /* * SNAP data link layer. Derived from 802.2 * * Alan Cox <alan@lxorguk.ukuu.org.uk>, * from the 802.2 layer by Greg Page. * Merged in additions from Greg Page's psnap.c. */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/datalink.h> #include <net/llc.h> #include <net/psnap.h> #include <linux/mm.h> #include <linux/in.h> #include <linux/init.h> #include <linux/rculist.h> static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); static struct llc_sap *snap_sap; /* * Find a snap client by matching the 5 bytes. */ static struct datalink_proto *find_snap_client(const unsigned char *desc) { struct datalink_proto *proto = NULL, *p; list_for_each_entry_rcu(p, &snap_list, node, lockdep_is_held(&snap_lock)) { if (!memcmp(p->type, desc, 5)) { proto = p; break; } } return proto; } /* * A SNAP packet has arrived */ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; struct datalink_proto *proto; static struct packet_type snap_packet_type = { .type = cpu_to_be16(ETH_P_SNAP), }; if (unlikely(!pskb_may_pull(skb, 5))) goto drop; rcu_read_lock(); proto = find_snap_client(skb->data); if (proto) { /* Pass the frame on. */ skb_pull_rcsum(skb, 5); skb_reset_transport_header(skb); rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } rcu_read_unlock(); if (unlikely(!proto)) goto drop; out: return rc; drop: kfree_skb(skb); goto out; } /* * Put a SNAP header on a frame and pass to 802.2 */ static int snap_request(struct datalink_proto *dl, struct sk_buff *skb, const u8 *dest) { memcpy(skb_push(skb, 5), dl->type, 5); llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap); return 0; } /* * Set up the SNAP layer */ EXPORT_SYMBOL(register_snap_client); EXPORT_SYMBOL(unregister_snap_client); static const char snap_err_msg[] __initconst = KERN_CRIT "SNAP - unable to register with 802.2\n"; static int __init snap_init(void) { snap_sap = llc_sap_open(0xAA, snap_rcv); if (!snap_sap) { printk(snap_err_msg); return -EBUSY; } return 0; } module_init(snap_init); static void __exit snap_exit(void) { llc_sap_put(snap_sap); } module_exit(snap_exit); /* * Register SNAP clients. We don't yet use this for IP. */ struct datalink_proto *register_snap_client(const unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *)) { struct datalink_proto *proto = NULL; spin_lock_bh(&snap_lock); if (find_snap_client(desc)) goto out; proto = kmalloc(sizeof(*proto), GFP_ATOMIC); if (proto) { memcpy(proto->type, desc, 5); proto->rcvfunc = rcvfunc; proto->header_length = 5 + 3; /* snap + 802.2 */ proto->request = snap_request; list_add_rcu(&proto->node, &snap_list); } out: spin_unlock_bh(&snap_lock); return proto; } /* * Unregister SNAP clients. Protocols no longer want to play with us ... */ void unregister_snap_client(struct datalink_proto *proto) { spin_lock_bh(&snap_lock); list_del_rcu(&proto->node); spin_unlock_bh(&snap_lock); synchronize_net(); kfree(proto); } MODULE_DESCRIPTION("SNAP data link layer. Derived from 802.2"); MODULE_LICENSE("GPL");
10 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 // SPDX-License-Identifier: GPL-2.0-or-later /* * LAPB release 002 * * This code REQUIRES 2.1.15 or higher/ NET3.038 * * History * LAPB 001 Jonathan Naylor Started Coding */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/inet.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/lapb.h> /* * This routine purges all the queues of frames. */ void lapb_clear_queues(struct lapb_cb *lapb) { skb_queue_purge(&lapb->write_queue); skb_queue_purge(&lapb->ack_queue); } /* * This routine purges the input queue of those frames that have been * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the * SDL diagram. */ void lapb_frames_acked(struct lapb_cb *lapb, unsigned short nr) { struct sk_buff *skb; int modulus; modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS : LAPB_SMODULUS; /* * Remove all the ack-ed frames from the ack queue. */ if (lapb->va != nr) while (skb_peek(&lapb->ack_queue) && lapb->va != nr) { skb = skb_dequeue(&lapb->ack_queue); kfree_skb(skb); lapb->va = (lapb->va + 1) % modulus; } } void lapb_requeue_frames(struct lapb_cb *lapb) { struct sk_buff *skb, *skb_prev = NULL; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by lapb_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ while ((skb = skb_dequeue(&lapb->ack_queue)) != NULL) { if (!skb_prev) skb_queue_head(&lapb->write_queue, skb); else skb_append(skb_prev, skb, &lapb->write_queue); skb_prev = skb; } } /* * Validate that the value of nr is between va and vs. Return true or * false for testing. */ int lapb_validate_nr(struct lapb_cb *lapb, unsigned short nr) { unsigned short vc = lapb->va; int modulus; modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS : LAPB_SMODULUS; while (vc != lapb->vs) { if (nr == vc) return 1; vc = (vc + 1) % modulus; } return nr == lapb->vs; } /* * This routine is the centralised routine for parsing the control * information for the different frame formats. */ int lapb_decode(struct lapb_cb *lapb, struct sk_buff *skb, struct lapb_frame *frame) { frame->type = LAPB_ILLEGAL; lapb_dbg(2, "(%p) S%d RX %3ph\n", lapb->dev, lapb->state, skb->data); /* We always need to look at 2 bytes, sometimes we need * to look at 3 and those cases are handled below. */ if (!pskb_may_pull(skb, 2)) return -1; if (lapb->mode & LAPB_MLP) { if (lapb->mode & LAPB_DCE) { if (skb->data[0] == LAPB_ADDR_D) frame->cr = LAPB_COMMAND; if (skb->data[0] == LAPB_ADDR_C) frame->cr = LAPB_RESPONSE; } else { if (skb->data[0] == LAPB_ADDR_C) frame->cr = LAPB_COMMAND; if (skb->data[0] == LAPB_ADDR_D) frame->cr = LAPB_RESPONSE; } } else { if (lapb->mode & LAPB_DCE) { if (skb->data[0] == LAPB_ADDR_B) frame->cr = LAPB_COMMAND; if (skb->data[0] == LAPB_ADDR_A) frame->cr = LAPB_RESPONSE; } else { if (skb->data[0] == LAPB_ADDR_A) frame->cr = LAPB_COMMAND; if (skb->data[0] == LAPB_ADDR_B) frame->cr = LAPB_RESPONSE; } } skb_pull(skb, 1); if (lapb->mode & LAPB_EXTENDED) { if (!(skb->data[0] & LAPB_S)) { if (!pskb_may_pull(skb, 2)) return -1; /* * I frame - carries NR/NS/PF */ frame->type = LAPB_I; frame->ns = (skb->data[0] >> 1) & 0x7F; frame->nr = (skb->data[1] >> 1) & 0x7F; frame->pf = skb->data[1] & LAPB_EPF; frame->control[0] = skb->data[0]; frame->control[1] = skb->data[1]; skb_pull(skb, 2); } else if ((skb->data[0] & LAPB_U) == 1) { if (!pskb_may_pull(skb, 2)) return -1; /* * S frame - take out PF/NR */ frame->type = skb->data[0] & 0x0F; frame->nr = (skb->data[1] >> 1) & 0x7F; frame->pf = skb->data[1] & LAPB_EPF; frame->control[0] = skb->data[0]; frame->control[1] = skb->data[1]; skb_pull(skb, 2); } else if ((skb->data[0] & LAPB_U) == 3) { /* * U frame - take out PF */ frame->type = skb->data[0] & ~LAPB_SPF; frame->pf = skb->data[0] & LAPB_SPF; frame->control[0] = skb->data[0]; frame->control[1] = 0x00; skb_pull(skb, 1); } } else { if (!(skb->data[0] & LAPB_S)) { /* * I frame - carries NR/NS/PF */ frame->type = LAPB_I; frame->ns = (skb->data[0] >> 1) & 0x07; frame->nr = (skb->data[0] >> 5) & 0x07; frame->pf = skb->data[0] & LAPB_SPF; } else if ((skb->data[0] & LAPB_U) == 1) { /* * S frame - take out PF/NR */ frame->type = skb->data[0] & 0x0F; frame->nr = (skb->data[0] >> 5) & 0x07; frame->pf = skb->data[0] & LAPB_SPF; } else if ((skb->data[0] & LAPB_U) == 3) { /* * U frame - take out PF */ frame->type = skb->data[0] & ~LAPB_SPF; frame->pf = skb->data[0] & LAPB_SPF; } frame->control[0] = skb->data[0]; skb_pull(skb, 1); } return 0; } /* * This routine is called when the HDLC layer internally generates a * command or response for the remote machine ( eg. RR, UA etc. ). * Only supervisory or unnumbered frames are processed, FRMRs are handled * by lapb_transmit_frmr below. */ void lapb_send_control(struct lapb_cb *lapb, int frametype, int poll_bit, int type) { struct sk_buff *skb; unsigned char *dptr; if ((skb = alloc_skb(LAPB_HEADER_LEN + 3, GFP_ATOMIC)) == NULL) return; skb_reserve(skb, LAPB_HEADER_LEN + 1); if (lapb->mode & LAPB_EXTENDED) { if ((frametype & LAPB_U) == LAPB_U) { dptr = skb_put(skb, 1); *dptr = frametype; *dptr |= poll_bit ? LAPB_SPF : 0; } else { dptr = skb_put(skb, 2); dptr[0] = frametype; dptr[1] = (lapb->vr << 1); dptr[1] |= poll_bit ? LAPB_EPF : 0; } } else { dptr = skb_put(skb, 1); *dptr = frametype; *dptr |= poll_bit ? LAPB_SPF : 0; if ((frametype & LAPB_U) == LAPB_S) /* S frames carry NR */ *dptr |= (lapb->vr << 5); } lapb_transmit_buffer(lapb, skb, type); } /* * This routine generates FRMRs based on information previously stored in * the LAPB control block. */ void lapb_transmit_frmr(struct lapb_cb *lapb) { struct sk_buff *skb; unsigned char *dptr; if ((skb = alloc_skb(LAPB_HEADER_LEN + 7, GFP_ATOMIC)) == NULL) return; skb_reserve(skb, LAPB_HEADER_LEN + 1); if (lapb->mode & LAPB_EXTENDED) { dptr = skb_put(skb, 6); *dptr++ = LAPB_FRMR; *dptr++ = lapb->frmr_data.control[0]; *dptr++ = lapb->frmr_data.control[1]; *dptr++ = (lapb->vs << 1) & 0xFE; *dptr = (lapb->vr << 1) & 0xFE; if (lapb->frmr_data.cr == LAPB_RESPONSE) *dptr |= 0x01; dptr++; *dptr++ = lapb->frmr_type; lapb_dbg(1, "(%p) S%d TX FRMR %5ph\n", lapb->dev, lapb->state, &skb->data[1]); } else { dptr = skb_put(skb, 4); *dptr++ = LAPB_FRMR; *dptr++ = lapb->frmr_data.control[0]; *dptr = (lapb->vs << 1) & 0x0E; *dptr |= (lapb->vr << 5) & 0xE0; if (lapb->frmr_data.cr == LAPB_RESPONSE) *dptr |= 0x10; dptr++; *dptr++ = lapb->frmr_type; lapb_dbg(1, "(%p) S%d TX FRMR %3ph\n", lapb->dev, lapb->state, &skb->data[1]); } lapb_transmit_buffer(lapb, skb, LAPB_RESPONSE); }
52 57 57 40 3 2 38 22 1 3 32 13 20 13 13 110 4 59 71 74 4 37 59 71 99 17 17 60 1 6 60 58 6 3 61 59 70 11 64 63 64 84 12 12 5 5 1 17 19 20 20 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/seq_file.c * * helper functions for making synthetic files from sequences of records. * initial implementation -- AV, Oct 2001. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/fs.h> #include <linux/export.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/mm.h> #include <linux/printk.h> #include <linux/string_helpers.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <asm/page.h> static struct kmem_cache *seq_file_cache __ro_after_init; static void seq_set_overflow(struct seq_file *m) { m->count = m->size; } static void *seq_buf_alloc(unsigned long size) { if (unlikely(size > MAX_RW_COUNT)) return NULL; return kvmalloc(size, GFP_KERNEL_ACCOUNT); } /** * seq_open - initialize sequential file * @file: file we initialize * @op: method table describing the sequence * * seq_open() sets @file, associating it with a sequence described * by @op. @op->start() sets the iterator up and returns the first * element of sequence. @op->stop() shuts it down. @op->next() * returns the next element of sequence. @op->show() prints element * into the buffer. In case of error ->start() and ->next() return * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. * Returning SEQ_SKIP means "discard this element and move on". * Note: seq_open() will allocate a struct seq_file and store its * pointer in @file->private_data. This pointer should not be modified. */ int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p; WARN_ON(file->private_data); p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL); if (!p) return -ENOMEM; file->private_data = p; mutex_init(&p->lock); p->op = op; // No refcounting: the lifetime of 'p' is constrained // to the lifetime of the file. p->file = file; /* * seq_files support lseek() and pread(). They do not implement * write() at all, but we clear FMODE_PWRITE here for historical * reasons. * * If a client of seq_files a) implements file.write() and b) wishes to * support pwrite() then that client will need to implement its own * file.open() which calls seq_open() and then sets FMODE_PWRITE. */ file->f_mode &= ~FMODE_PWRITE; return 0; } EXPORT_SYMBOL(seq_open); static int traverse(struct seq_file *m, loff_t offset) { loff_t pos = 0; int error = 0; void *p; m->index = 0; m->count = m->from = 0; if (!offset) return 0; if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } p = m->op->start(m, &m->index); while (p) { error = PTR_ERR(p); if (IS_ERR(p)) break; error = m->op->show(m, p); if (error < 0) break; if (unlikely(error)) { error = 0; m->count = 0; } if (seq_has_overflowed(m)) goto Eoverflow; p = m->op->next(m, p, &m->index); if (pos + m->count > offset) { m->from = offset - pos; m->count -= m->from; break; } pos += m->count; m->count = 0; if (pos == offset) break; } m->op->stop(m, p); return error; Eoverflow: m->op->stop(m, p); kvfree(m->buf); m->count = 0; m->buf = seq_buf_alloc(m->size <<= 1); return !m->buf ? -ENOMEM : -EAGAIN; } /** * seq_read - ->read() method for sequential files. * @file: the file to read from * @buf: the buffer to read to * @size: the maximum number of bytes to read * @ppos: the current position in the file * * Ready-made ->f_op->read() */ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct iovec iov = { .iov_base = buf, .iov_len = size}; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, file); iov_iter_init(&iter, ITER_DEST, &iov, 1, size); kiocb.ki_pos = *ppos; ret = seq_read_iter(&kiocb, &iter); *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(seq_read); /* * Ready-made ->f_op->read_iter() */ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct seq_file *m = iocb->ki_filp->private_data; size_t copied = 0; size_t n; void *p; int err = 0; if (!iov_iter_count(iter)) return 0; mutex_lock(&m->lock); /* * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ if (iocb->ki_pos == 0) { m->index = 0; m->count = 0; } /* Don't assume ki_pos is where we left it */ if (unlikely(iocb->ki_pos != m->read_pos)) { while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN) ; if (err) { /* With prejudice... */ m->read_pos = 0; m->index = 0; m->count = 0; goto Done; } else { m->read_pos = iocb->ki_pos; } } /* grab buffer if we didn't have one */ if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) goto Enomem; } // something left in the buffer - copy it out first if (m->count) { n = copy_to_iter(m->buf + m->from, m->count, iter); m->count -= n; m->from += n; copied += n; if (m->count) // hadn't managed to copy everything goto Done; } // get a non-empty record in the buffer m->from = 0; p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); if (!p || IS_ERR(p)) // EOF or an error break; err = m->op->show(m, p); if (err < 0) // hard error break; if (unlikely(err)) // ->show() says "skip it" m->count = 0; if (unlikely(!m->count)) { // empty record p = m->op->next(m, p, &m->index); continue; } if (!seq_has_overflowed(m)) // got it goto Fill; // need a bigger buffer m->op->stop(m, p); kvfree(m->buf); m->count = 0; m->buf = seq_buf_alloc(m->size <<= 1); if (!m->buf) goto Enomem; p = m->op->start(m, &m->index); } // EOF or an error m->op->stop(m, p); m->count = 0; goto Done; Fill: // one non-empty record is in the buffer; if they want more, // try to fit more in, but in any case we need to advance // the iterator once for every record shown. while (1) { size_t offs = m->count; loff_t pos = m->index; p = m->op->next(m, p, &m->index); if (pos == m->index) { pr_info_ratelimited("buggy .next function %ps did not update position index\n", m->op->next); m->index++; } if (!p || IS_ERR(p)) // no next record for us break; if (m->count >= iov_iter_count(iter)) break; err = m->op->show(m, p); if (err > 0) { // ->show() says "skip it" m->count = offs; } else if (err || seq_has_overflowed(m)) { m->count = offs; break; } } m->op->stop(m, p); n = copy_to_iter(m->buf, m->count, iter); copied += n; m->count -= n; m->from = n; Done: if (unlikely(!copied)) { copied = m->count ? -EFAULT : err; } else { iocb->ki_pos += copied; m->read_pos += copied; } mutex_unlock(&m->lock); return copied; Enomem: err = -ENOMEM; goto Done; } EXPORT_SYMBOL(seq_read_iter); /** * seq_lseek - ->llseek() method for sequential files. * @file: the file in question * @offset: new position * @whence: 0 for absolute, 1 for relative position * * Ready-made ->f_op->llseek() */ loff_t seq_lseek(struct file *file, loff_t offset, int whence) { struct seq_file *m = file->private_data; loff_t retval = -EINVAL; mutex_lock(&m->lock); switch (whence) { case SEEK_CUR: offset += file->f_pos; fallthrough; case SEEK_SET: if (offset < 0) break; retval = offset; if (offset != m->read_pos) { while ((retval = traverse(m, offset)) == -EAGAIN) ; if (retval) { /* with extreme prejudice... */ file->f_pos = 0; m->read_pos = 0; m->index = 0; m->count = 0; } else { m->read_pos = offset; retval = file->f_pos = offset; } } else { file->f_pos = offset; } } mutex_unlock(&m->lock); return retval; } EXPORT_SYMBOL(seq_lseek); /** * seq_release - free the structures associated with sequential file. * @inode: its inode * @file: file in question * * Frees the structures associated with sequential file; can be used * as ->f_op->release() if you don't have private data to destroy. */ int seq_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; kvfree(m->buf); kmem_cache_free(seq_file_cache, m); return 0; } EXPORT_SYMBOL(seq_release); /** * seq_escape_mem - print data into buffer, escaping some characters * @m: target buffer * @src: source buffer * @len: size of source buffer * @flags: flags to pass to string_escape_mem() * @esc: set of characters that need escaping * * Puts data into buffer, replacing each occurrence of character from * given class (defined by @flags and @esc) with printable escaped sequence. * * Use seq_has_overflowed() to check for errors. */ void seq_escape_mem(struct seq_file *m, const char *src, size_t len, unsigned int flags, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int ret; ret = string_escape_mem(src, len, buf, size, flags, esc); seq_commit(m, ret < size ? ret : -1); } EXPORT_SYMBOL(seq_escape_mem); void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; if (m->count < m->size) { len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); if (m->count + len < m->size) { m->count += len; return; } } seq_set_overflow(m); } EXPORT_SYMBOL(seq_vprintf); void seq_printf(struct seq_file *m, const char *f, ...) { va_list args; va_start(args, f); seq_vprintf(m, f, args); va_end(args); } EXPORT_SYMBOL(seq_printf); #ifdef CONFIG_BINARY_PRINTF void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary) { int len; if (m->count < m->size) { len = bstr_printf(m->buf + m->count, m->size - m->count, f, binary); if (m->count + len < m->size) { m->count += len; return; } } seq_set_overflow(m); } EXPORT_SYMBOL(seq_bprintf); #endif /* CONFIG_BINARY_PRINTF */ /** * mangle_path - mangle and copy path to buffer beginning * @s: buffer start * @p: beginning of path in above buffer * @esc: set of characters that need escaping * * Copy the path from @p to @s, replacing each occurrence of character from * @esc with usual octal escape. * Returns pointer past last written character in @s, or NULL in case of * failure. */ char *mangle_path(char *s, const char *p, const char *esc) { while (s <= p) { char c = *p++; if (!c) { return s; } else if (!strchr(esc, c)) { *s++ = c; } else if (s + 4 > p) { break; } else { *s++ = '\\'; *s++ = '0' + ((c & 0300) >> 6); *s++ = '0' + ((c & 070) >> 3); *s++ = '0' + (c & 07); } } return NULL; } EXPORT_SYMBOL(mangle_path); /** * seq_path - seq_file interface to print a pathname * @m: the seq_file handle * @path: the struct path to print * @esc: set of characters to escape in the output * * return the absolute path of 'path', as represented by the * dentry / mnt pair in the path parameter. */ int seq_path(struct seq_file *m, const struct path *path, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -1; if (size) { char *p = d_path(path, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_commit(m, res); return res; } EXPORT_SYMBOL(seq_path); /** * seq_file_path - seq_file interface to print a pathname of a file * @m: the seq_file handle * @file: the struct file to print * @esc: set of characters to escape in the output * * return the absolute path to the file. */ int seq_file_path(struct seq_file *m, struct file *file, const char *esc) { return seq_path(m, &file->f_path, esc); } EXPORT_SYMBOL(seq_file_path); /* * Same as seq_path, but relative to supplied root. */ int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -ENAMETOOLONG; if (size) { char *p; p = __d_path(path, root, buf, size); if (!p) return SEQ_SKIP; res = PTR_ERR(p); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; else res = -ENAMETOOLONG; } } seq_commit(m, res); return res < 0 && res != -ENAMETOOLONG ? res : 0; } /* * returns the path of the 'dentry' from the root of its filesystem. */ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -1; if (size) { char *p = dentry_path(dentry, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_commit(m, res); return res; } EXPORT_SYMBOL(seq_dentry); void *single_start(struct seq_file *p, loff_t *pos) { return *pos ? NULL : SEQ_START_TOKEN; } static void *single_next(struct seq_file *p, void *v, loff_t *pos) { ++*pos; return NULL; } static void single_stop(struct seq_file *p, void *v) { } int single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data) { struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT); int res = -ENOMEM; if (op) { op->start = single_start; op->next = single_next; op->stop = single_stop; op->show = show; res = seq_open(file, op); if (!res) ((struct seq_file *)file->private_data)->private = data; else kfree(op); } return res; } EXPORT_SYMBOL(single_open); int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), void *data, size_t size) { char *buf = seq_buf_alloc(size); int ret; if (!buf) return -ENOMEM; ret = single_open(file, show, data); if (ret) { kvfree(buf); return ret; } ((struct seq_file *)file->private_data)->buf = buf; ((struct seq_file *)file->private_data)->size = size; return 0; } EXPORT_SYMBOL(single_open_size); int single_release(struct inode *inode, struct file *file) { const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; int res = seq_release(inode, file); kfree(op); return res; } EXPORT_SYMBOL(single_release); int seq_release_private(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; kfree(seq->private); seq->private = NULL; return seq_release(inode, file); } EXPORT_SYMBOL(seq_release_private); void *__seq_open_private(struct file *f, const struct seq_operations *ops, int psize) { int rc; void *private; struct seq_file *seq; private = kzalloc(psize, GFP_KERNEL_ACCOUNT); if (private == NULL) goto out; rc = seq_open(f, ops); if (rc < 0) goto out_free; seq = f->private_data; seq->private = private; return private; out_free: kfree(private); out: return NULL; } EXPORT_SYMBOL(__seq_open_private); int seq_open_private(struct file *filp, const struct seq_operations *ops, int psize) { return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM; } EXPORT_SYMBOL(seq_open_private); void seq_putc(struct seq_file *m, char c) { if (m->count >= m->size) return; m->buf[m->count++] = c; } EXPORT_SYMBOL(seq_putc); void __seq_puts(struct seq_file *m, const char *s) { seq_write(m, s, strlen(s)); } EXPORT_SYMBOL(__seq_puts); /** * seq_put_decimal_ull_width - A helper routine for putting decimal numbers * without rich format of printf(). * only 'unsigned long long' is supported. * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number * @num: the number * @width: a minimum field width * * This routine will put strlen(delimiter) + number into seq_filed. * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width) { int len; if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ goto overflow; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } if (!width) width = 1; if (m->count + width >= m->size) goto overflow; len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; m->count += len; return; overflow: seq_set_overflow(m); } void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num) { return seq_put_decimal_ull_width(m, delimiter, num, 0); } EXPORT_SYMBOL(seq_put_decimal_ull); /** * seq_put_hex_ll - put a number in hexadecimal notation * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number * @v: the number * @width: a minimum field width * * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v) * * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width) { unsigned int len; int i; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } /* If x is 0, the result of __builtin_clzll is undefined */ if (v == 0) len = 1; else len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4; if (len < width) len = width; if (m->count + len > m->size) { seq_set_overflow(m); return; } for (i = len - 1; i >= 0; i--) { m->buf[m->count + i] = hex_asc[0xf & v]; v = v >> 4; } m->count += len; } void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) { int len; if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ goto overflow; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } if (m->count + 2 >= m->size) goto overflow; if (num < 0) { m->buf[m->count++] = '-'; num = -num; } if (num < 10) { m->buf[m->count++] = num + '0'; return; } len = num_to_str(m->buf + m->count, m->size - m->count, num, 0); if (!len) goto overflow; m->count += len; return; overflow: seq_set_overflow(m); } EXPORT_SYMBOL(seq_put_decimal_ll); /** * seq_write - write arbitrary data to buffer * @seq: seq_file identifying the buffer to which data should be written * @data: data address * @len: number of bytes * * Return 0 on success, non-zero otherwise. */ int seq_write(struct seq_file *seq, const void *data, size_t len) { if (seq->count + len < seq->size) { memcpy(seq->buf + seq->count, data, len); seq->count += len; return 0; } seq_set_overflow(seq); return -1; } EXPORT_SYMBOL(seq_write); /** * seq_pad - write padding spaces to buffer * @m: seq_file identifying the buffer to which data should be written * @c: the byte to append after padding if non-zero */ void seq_pad(struct seq_file *m, char c) { int size = m->pad_until - m->count; if (size > 0) { if (size + m->count > m->size) { seq_set_overflow(m); return; } memset(m->buf + m->count, ' ', size); m->count += size; } if (c) seq_putc(m, c); } EXPORT_SYMBOL(seq_pad); /* A complete analogue of print_hex_dump() */ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; char *buffer; size_t size; int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; switch (prefix_type) { case DUMP_PREFIX_ADDRESS: seq_printf(m, "%s%p: ", prefix_str, ptr + i); break; case DUMP_PREFIX_OFFSET: seq_printf(m, "%s%.8x: ", prefix_str, i); break; default: seq_printf(m, "%s", prefix_str); break; } size = seq_get_buf(m, &buffer); ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, buffer, size, ascii); seq_commit(m, ret < size ? ret : -1); seq_putc(m, '\n'); } } EXPORT_SYMBOL(seq_hex_dump); struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; list_for_each(lh, head) if (pos-- == 0) return lh; return NULL; } EXPORT_SYMBOL(seq_list_start); struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) { if (!pos) return head; return seq_list_start(head, pos - 1); } EXPORT_SYMBOL(seq_list_start_head); struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) { struct list_head *lh; lh = ((struct list_head *)v)->next; ++*ppos; return lh == head ? NULL : lh; } EXPORT_SYMBOL(seq_list_next); struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos) { struct list_head *lh; list_for_each_rcu(lh, head) if (pos-- == 0) return lh; return NULL; } EXPORT_SYMBOL(seq_list_start_rcu); struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos) { if (!pos) return head; return seq_list_start_rcu(head, pos - 1); } EXPORT_SYMBOL(seq_list_start_head_rcu); struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos) { struct list_head *lh; lh = list_next_rcu((struct list_head *)v); ++*ppos; return lh == head ? NULL : lh; } EXPORT_SYMBOL(seq_list_next_rcu); /** * seq_hlist_start - start an iteration of a hlist * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). */ struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos) { struct hlist_node *node; hlist_for_each(node, head) if (pos-- == 0) return node; return NULL; } EXPORT_SYMBOL(seq_hlist_start); /** * seq_hlist_start_head - start an iteration of a hlist * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). Call this function if you want to * print a header at the top of the output. */ struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos) { if (!pos) return SEQ_START_TOKEN; return seq_hlist_start(head, pos - 1); } EXPORT_SYMBOL(seq_hlist_start_head); /** * seq_hlist_next - move to the next position of the hlist * @v: the current iterator * @head: the head of the hlist * @ppos: the current position * * Called at seq_file->op->next(). */ struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head, loff_t *ppos) { struct hlist_node *node = v; ++*ppos; if (v == SEQ_START_TOKEN) return head->first; else return node->next; } EXPORT_SYMBOL(seq_hlist_next); /** * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos) { struct hlist_node *node; __hlist_for_each_rcu(node, head) if (pos-- == 0) return node; return NULL; } EXPORT_SYMBOL(seq_hlist_start_rcu); /** * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). Call this function if you want to * print a header at the top of the output. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos) { if (!pos) return SEQ_START_TOKEN; return seq_hlist_start_rcu(head, pos - 1); } EXPORT_SYMBOL(seq_hlist_start_head_rcu); /** * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU * @v: the current iterator * @head: the head of the hlist * @ppos: the current position * * Called at seq_file->op->next(). * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head, loff_t *ppos) { struct hlist_node *node = v; ++*ppos; if (v == SEQ_START_TOKEN) return rcu_dereference(head->first); else return rcu_dereference(node->next); } EXPORT_SYMBOL(seq_hlist_next_rcu); /** * seq_hlist_start_percpu - start an iteration of a percpu hlist array * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence * * Called at seq_file->op->start(). */ struct hlist_node * seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos) { struct hlist_node *node; for_each_possible_cpu(*cpu) { hlist_for_each(node, per_cpu_ptr(head, *cpu)) { if (pos-- == 0) return node; } } return NULL; } EXPORT_SYMBOL(seq_hlist_start_percpu); /** * seq_hlist_next_percpu - move to the next position of the percpu hlist array * @v: pointer to current hlist_node * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence * * Called at seq_file->op->next(). */ struct hlist_node * seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos) { struct hlist_node *node = v; ++*pos; if (node->next) return node->next; for (*cpu = cpumask_next(*cpu, cpu_possible_mask); *cpu < nr_cpu_ids; *cpu = cpumask_next(*cpu, cpu_possible_mask)) { struct hlist_head *bucket = per_cpu_ptr(head, *cpu); if (!hlist_empty(bucket)) return bucket->first; } return NULL; } EXPORT_SYMBOL(seq_hlist_next_percpu); void __init seq_file_init(void) { seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC); }
32 142 99 94 94 92 95 93 13 95 12 93 96 3 1 6 131 131 25 12 3 11 135 135 5 135 1 6 6 5 3 2 30 7 6 1 1 6 14 16 9 1 8 8 4 7 3 6 2 7 4 7 1 7 3 6 7 13 13 5 5 2 2 2 14 14 1 7 6 2 5 11 2 10 11 5 6 11 11 6 6 5 4 2 2 2 2109 2108 494 493 122 25 25 131 131 131 131 131 15 1 14 14 5 14 14 1 2 3 12 3 8 11 11 11 11 11 10 16 4 12 8 3 7 3 13 1 2 4 4 4 11 5 3 15 9 2 1 4 95 95 78 2 74 27 1 3 3 2 1 1 14 2 1 2 4 1 8 13 6 15 2 13 1 1 1 1 1 2 2 2 1 1 1 5 7 2 1 1 1 1 1 2 14 11 1 2 1 1 1 1 1 1 1 1 21 21 5 16 16 20 19 4 4 13 2 5 3 26 20 6 3 1 2 2 1 4 3 1 1 1 24 6 18 4 3 1 3 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux IPv6 multicast routing support for BSD pim6sd * Based on net/ipv4/ipmr.c. * * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> * LSIIT Laboratory, Strasbourg, France * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> * 6WIND, Paris, France * Copyright (C)2007,2008 USAGI/WIDE Project * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ #include <linux/uaccess.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/compat.h> #include <linux/rhashtable.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/raw.h> #include <linux/notifier.h> #include <linux/if_arp.h> #include <net/checksum.h> #include <net/netlink.h> #include <net/fib_rules.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <linux/mroute6.h> #include <linux/pim.h> #include <net/addrconf.h> #include <linux/netfilter_ipv6.h> #include <linux/export.h> #include <net/ip6_checksum.h> #include <linux/netconf.h> #include <net/ip_tunnels.h> #include <linux/nospec.h> struct ip6mr_rule { struct fib_rule common; }; struct ip6mr_result { struct mr_table *mrt; }; /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ static DEFINE_SPINLOCK(mrt_lock); static struct net_device *vif_dev_read(const struct vif_device *vif) { return rcu_dereference(vif->dev); } /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); /* We return to original Alan's scheme. Hash table of resolved entries is changed only in process context and protected with weak lock mrt_lock. Queue of unresolved entries is protected with strong spinlock mfc_unres_lock. In this case data path is free of exclusive locks at all. */ static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ip6mr_new_table(struct net *net, u32 id); static void ip6mr_free_table(struct mr_table *mrt); static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *cache); static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd); static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ lockdep_rtnl_is_held() || \ list_empty(&net->ipv6.mr6_tables)) static bool ip6mr_can_free_table(struct net *net) { return !check_net(net) || !net_initialized(net); } static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { struct mr_table *ret; if (!mrt) ret = list_entry_rcu(net->ipv6.mr6_tables.next, struct mr_table, list); else ret = list_entry_rcu(mrt->list.next, struct mr_table, list); if (&ret->list == &net->ipv6.mr6_tables) return NULL; return ret; } static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) { struct mr_table *mrt; ip6mr_for_each_table(mrt, net) { if (mrt->id == id) return mrt; } return NULL; } static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { struct mr_table *mrt; rcu_read_lock(); mrt = __ip6mr_get_table(net, id); rcu_read_unlock(); return mrt; } static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr_table **mrt) { int err; struct ip6mr_result res; struct fib_lookup_arg arg = { .result = &res, .flags = FIB_LOOKUP_NOREF, }; /* update flow if oif or iif point to device enslaved to l3mdev */ l3mdev_update_flow(net, flowi6_to_flowi(flp6)); err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flowi6_to_flowi(flp6), 0, &arg); if (err < 0) return err; *mrt = res.mrt; return 0; } static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct ip6mr_result *res = arg->result; struct mr_table *mrt; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } arg->table = fib_rule_get_table(rule, arg); mrt = __ip6mr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; } static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) { return 1; } static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) { return 0; } static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { return 1; } static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh) { frh->dst_len = 0; frh->src_len = 0; frh->tos = 0; return 0; } static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { .family = RTNL_FAMILY_IP6MR, .rule_size = sizeof(struct ip6mr_rule), .addr_size = sizeof(struct in6_addr), .action = ip6mr_rule_action, .match = ip6mr_rule_match, .configure = ip6mr_rule_configure, .compare = ip6mr_rule_compare, .fill = ip6mr_rule_fill, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, }; static int __net_init ip6mr_rules_init(struct net *net) { struct fib_rules_ops *ops; struct mr_table *mrt; int err; ops = fib_rules_register(&ip6mr_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); INIT_LIST_HEAD(&net->ipv6.mr6_tables); mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); if (IS_ERR(mrt)) { err = PTR_ERR(mrt); goto err1; } err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); if (err < 0) goto err2; net->ipv6.mr6_rules_ops = ops; return 0; err2: rtnl_lock(); ip6mr_free_table(mrt); rtnl_unlock(); err1: fib_rules_unregister(ops); return err; } static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr_table *mrt, *next; ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } fib_rules_unregister(net->ipv6.mr6_rules_ops); } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); } static unsigned int ip6mr_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); } bool ip6mr_rule_default(const struct fib_rule *rule) { return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && rule->table == RT6_TABLE_DFLT && !rule->l3mdev; } EXPORT_SYMBOL(ip6mr_rule_default); #else #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) static bool ip6mr_can_free_table(struct net *net) { return !check_net(net); } static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { if (!mrt) return net->ipv6.mrt6; return NULL; } static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { return net->ipv6.mrt6; } #define __ip6mr_get_table ip6mr_get_table static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr_table **mrt) { *mrt = net->ipv6.mrt6; return 0; } static int __net_init ip6mr_rules_init(struct net *net) { struct mr_table *mrt; mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); if (IS_ERR(mrt)) return PTR_ERR(mrt); net->ipv6.mrt6 = mrt; return 0; } static void __net_exit ip6mr_rules_exit(struct net *net) { ASSERT_RTNL(); ip6mr_free_table(net->ipv6.mrt6); net->ipv6.mrt6 = NULL; } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static unsigned int ip6mr_rules_seq_read(const struct net *net) { return 0; } #endif static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct mfc6_cache_cmp_arg *cmparg = arg->key; struct mfc6_cache *c = (struct mfc6_cache *)ptr; return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); } static const struct rhashtable_params ip6mr_rht_params = { .head_offset = offsetof(struct mr_mfc, mnode), .key_offset = offsetof(struct mfc6_cache, cmparg), .key_len = sizeof(struct mfc6_cache_cmp_arg), .nelem_hint = 3, .obj_cmpfn = ip6mr_hash_cmp, .automatic_shrinking = true, }; static void ip6mr_new_table_set(struct mr_table *mrt, struct net *net) { #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); #endif } static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { .mf6c_origin = IN6ADDR_ANY_INIT, .mf6c_mcastgrp = IN6ADDR_ANY_INIT, }; static struct mr_table_ops ip6mr_mr_table_ops = { .rht_params = &ip6mr_rht_params, .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, }; static struct mr_table *ip6mr_new_table(struct net *net, u32 id) { struct mr_table *mrt; mrt = __ip6mr_get_table(net, id); if (mrt) return mrt; return mr_table_alloc(net, id, &ip6mr_mr_table_ops, ipmr_expire_process, ip6mr_new_table_set); } static void ip6mr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); WARN_ON_ONCE(!ip6mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing * /proc/ip6_mr_cache /proc/ip6_mr_vif */ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt; rcu_read_lock(); mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) { rcu_read_unlock(); return ERR_PTR(-ENOENT); } iter->mrt = mrt; return mr_vif_seq_start(seq, pos); } static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) { struct mr_vif_iter *iter = seq->private; struct mr_table *mrt = iter->mrt; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { const struct vif_device *vif = v; const struct net_device *vif_dev; const char *name; vif_dev = vif_dev_read(vif); name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", vif - mrt->vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); } return 0; } static const struct seq_operations ip6mr_vif_seq_ops = { .start = ip6mr_vif_seq_start, .next = mr_vif_seq_next, .stop = ip6mr_vif_seq_stop, .show = ip6mr_vif_seq_show, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); struct mr_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) return ERR_PTR(-ENOENT); return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) { int n; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Group " "Origin " "Iif Pkts Bytes Wrong Oifs\n"); } else { const struct mfc6_cache *mfc = v; const struct mr_mfc_iter *it = seq->private; struct mr_table *mrt = it->mrt; seq_printf(seq, "%pI6 %pI6 %-3hd", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->_c.mfc_parent); if (it->cache != &mrt->mfc_unres_queue) { seq_printf(seq, " %8lu %8lu %8lu", atomic_long_read(&mfc->_c.mfc_un.res.pkt), atomic_long_read(&mfc->_c.mfc_un.res.bytes), atomic_long_read(&mfc->_c.mfc_un.res.wrong_if)); for (n = mfc->_c.mfc_un.res.minvif; n < mfc->_c.mfc_un.res.maxvif; n++) { if (VIF_EXISTS(mrt, n) && mfc->_c.mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", n, mfc->_c.mfc_un.res.ttls[n]); } } else { /* unresolved mfc_caches don't contain * pkt, bytes and wrong_if values */ seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } return 0; } static const struct seq_operations ipmr_mfc_seq_ops = { .start = ipmr_mfc_seq_start, .next = mr_mfc_seq_next, .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; #endif #ifdef CONFIG_IPV6_PIMSM_V2 static int pim6_rcv(struct sk_buff *skb) { struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; int reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, sizeof(*pim), IPPROTO_PIM, csum_partial((void *)pim, sizeof(*pim), 0)) && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; /* check if the inner packet is destined to mcast group */ encap = (struct ipv6hdr *)(skb_transport_header(skb) + sizeof(*pim)); if (!ipv6_addr_is_multicast(&encap->daddr) || encap->payload_len == 0 || ntohs(encap->payload_len) + sizeof(*pim) > skb->len) goto drop; if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); if (reg_vif_num >= 0) reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); if (!reg_dev) goto drop; skb->mac_header = skb->network_header; skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_NONE; skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); netif_rx(skb); return 0; drop: kfree_skb(skb); return 0; } static const struct inet6_protocol pim6_protocol = { .handler = pim6_rcv, }; /* Service routines creating virtual interfaces: PIMREG */ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; if (!pskb_inet_may_pull(skb)) goto tx_err; if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto tx_err; DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); rcu_read_lock(); ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), MRT6MSG_WHOLEPKT); rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return NETDEV_TX_OK; } static int reg_vif_get_iflink(const struct net_device *dev) { return 0; } static const struct net_device_ops reg_vif_netdev_ops = { .ndo_start_xmit = reg_vif_xmit, .ndo_get_iflink = reg_vif_get_iflink, }; static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = &reg_vif_netdev_ops; dev->needs_free_netdev = true; dev->netns_immutable = true; } static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; char name[IFNAMSIZ]; if (mrt->id == RT6_TABLE_DFLT) sprintf(name, "pim6reg"); else sprintf(name, "pim6reg%u", mrt->id); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (!dev) return NULL; dev_net_set(dev, net); if (register_netdevice(dev)) { free_netdev(dev); return NULL; } if (dev_open(dev, NULL)) goto failure; dev_hold(dev); return dev; failure: unregister_netdevice(dev); return NULL; } #endif static int call_ip6mr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, struct net_device *vif_dev, mifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, vif, vif_dev, vif_index, tb_id, &net->ipv6.ipmr_seq); } static int call_ip6mr_mfc_entry_notifiers(struct net *net, enum fib_event_type event_type, struct mfc6_cache *mfc, u32 tb_id) { return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, &mfc->_c, tb_id, &net->ipv6.ipmr_seq); } /* Delete a VIF entry */ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { struct vif_device *v; struct net_device *dev; struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL; v = &mrt->vif_table[vifi]; dev = rtnl_dereference(v->dev); if (!dev) return -EADDRNOTAVAIL; call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), FIB_EVENT_VIF_DEL, v, dev, vifi, mrt->id); spin_lock(&mrt_lock); RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 if (vifi == mrt->mroute_reg_vif_num) { /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ WRITE_ONCE(mrt->mroute_reg_vif_num, -1); } #endif if (vifi + 1 == mrt->maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { if (VIF_EXISTS(mrt, tmp)) break; } WRITE_ONCE(mrt->maxvif, tmp + 1); } spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); in6_dev = __in6_dev_get(dev); if (in6_dev) { atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); netdev_put(dev, &v->dev_tracker); return 0; } static inline void ip6mr_cache_free_rcu(struct rcu_head *head) { struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); } static inline void ip6mr_cache_free(struct mfc6_cache *c) { call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); } /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; atomic_dec(&mrt->cache_resolve_queue_len); while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } ip6mr_cache_free(c); } /* Timer process for all the unresolved queue. */ static void ipmr_do_expire_process(struct mr_table *mrt) { unsigned long now = jiffies; unsigned long expires = 10 * HZ; struct mr_mfc *c, *next; list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { if (time_after(c->mfc_un.unres.expires, now)) { /* not yet... */ unsigned long interval = c->mfc_un.unres.expires - now; if (interval < expires) expires = interval; continue; } list_del(&c->list); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } if (!list_empty(&mrt->mfc_unres_queue)) mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); } static void ipmr_expire_process(struct timer_list *t) { struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); return; } if (!list_empty(&mrt->mfc_unres_queue)) ipmr_do_expire_process(mrt); spin_unlock(&mfc_unres_lock); } /* Fill oifs list. It is called under locked mrt_lock. */ static void ip6mr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { int vifi; cache->mfc_un.res.minvif = MAXMIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); for (vifi = 0; vifi < mrt->maxvif; vifi++) { if (VIF_EXISTS(mrt, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; if (cache->mfc_un.res.maxvif <= vifi) cache->mfc_un.res.maxvif = vifi + 1; } } WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies); } static int mif6_add(struct net *net, struct mr_table *mrt, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { #ifdef CONFIG_IPV6_PIMSM_V2 case MIFF_REGISTER: /* * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ip6mr_reg_vif(net, mrt); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); if (err) { unregister_netdevice(dev); dev_put(dev); return err; } break; #endif case 0: dev = dev_get_by_index(net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); if (err) { dev_put(dev); return err; } break; default: return -EINVAL; } in6_dev = __in6_dev_get(dev); if (in6_dev) { atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } /* Fill in the VIF structures */ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), MIFF_REGISTER); /* And finish update writing critical data */ spin_lock(&mrt_lock); rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); #endif if (vifi + 1 > mrt->maxvif) WRITE_ONCE(mrt->maxvif, vifi + 1); spin_unlock(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); return 0; } static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, const struct in6_addr *origin, const struct in6_addr *mcastgrp) { struct mfc6_cache_cmp_arg arg = { .mf6c_origin = *origin, .mf6c_mcastgrp = *mcastgrp, }; return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, struct in6_addr *mcastgrp, mifi_t mifi) { struct mfc6_cache_cmp_arg arg = { .mf6c_origin = in6addr_any, .mf6c_mcastgrp = *mcastgrp, }; if (ipv6_addr_any(mcastgrp)) return mr_mfc_find_any_parent(mrt, mifi); return mr_mfc_find_any(mrt, mifi, &arg); } /* Look for a (S,G,iif) entry if parent != -1 */ static struct mfc6_cache * ip6mr_cache_find_parent(struct mr_table *mrt, const struct in6_addr *origin, const struct in6_addr *mcastgrp, int parent) { struct mfc6_cache_cmp_arg arg = { .mf6c_origin = *origin, .mf6c_mcastgrp = *mcastgrp, }; return mr_mfc_find_parent(mrt, &arg, parent); } /* Allocate a multicast cache entry */ static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->_c.mfc_un.res.minvif = MAXMIFS; c->_c.free = ip6mr_cache_free_rcu; refcount_set(&c->_c.mfc_un.res.refcount, 1); return c; } static struct mfc6_cache *ip6mr_cache_alloc_unres(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (!c) return NULL; skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; return c; } /* * A cache entry has gone into a resolved state from queued */ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc6_cache *uc, struct mfc6_cache *c) { struct sk_buff *skb; /* * Play the pending entries through our router */ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); if (mr_fill_mroute(mrt, skb, &c->_c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { rcu_read_lock(); ip6_mr_forward(net, mrt, skb->dev, skb, c); rcu_read_unlock(); } } } /* * Bounce a cache query up to pim6sd and netlink. * * Called under rcu_read_lock() */ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert) { struct sock *mroute6_sk; struct sk_buff *skb; struct mrt6msg *msg; int ret; #ifdef CONFIG_IPV6_PIMSM_V2 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) +sizeof(*msg)); else #endif skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); if (!skb) return -ENOBUFS; /* I suppose that internal messages * do not require checksums */ skb->ip_summed = CHECKSUM_UNNECESSARY; #ifdef CONFIG_IPV6_PIMSM_V2 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { /* Ugly, but we have no choice with this interface. Duplicate old header, fix length etc. And all this only to mangle msg->im6_msgtype and to set msg->im6_mbz to "mbz" :-) */ __skb_pull(skb, skb_network_offset(pkt)); skb_push(skb, sizeof(*msg)); skb_reset_transport_header(skb); msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = assert; if (assert == MRT6MSG_WRMIFWHOLE) msg->im6_mif = mifi; else msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; skb->ip_summed = CHECKSUM_UNNECESSARY; } else #endif { /* * Copy the IP header */ skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); /* * Add our header */ skb_put(skb, sizeof(*msg)); skb_reset_transport_header(skb); msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = assert; msg->im6_mif = mifi; msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; skb_dst_set(skb, dst_clone(skb_dst(pkt))); skb->ip_summed = CHECKSUM_UNNECESSARY; } mroute6_sk = rcu_dereference(mrt->mroute_sk); if (!mroute6_sk) { kfree_skb(skb); return -EINVAL; } mrt6msg_netlink_event(mrt, skb); /* Deliver to user space multicast routing algorithms */ ret = sock_queue_rcv_skb(mroute6_sk, skb); if (ret < 0) { net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); } return ret; } /* Queue a packet for resolution. It gets locked cache entry! */ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, struct sk_buff *skb, struct net_device *dev) { struct mfc6_cache *c; bool found = false; int err; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { found = true; break; } } if (!found) { /* * Create a new entry if allowable */ c = ip6mr_cache_alloc_unres(); if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); return -ENOBUFS; } /* Fill in the new cache entry */ c->_c.mfc_parent = -1; c->mf6c_origin = ipv6_hdr(skb)->saddr; c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; /* * Reflect first query at pim6sd */ err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ spin_unlock_bh(&mfc_unres_lock); ip6mr_cache_free(c); kfree_skb(skb); return err; } atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->_c.list, &mrt->mfc_unres_queue); mr6_netlink_event(mrt, c, RTM_NEWROUTE); ipmr_do_expire_process(mrt); } /* See if we can append the packet */ if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { if (dev) { skb->dev = dev; skb->skb_iif = dev->ifindex; } skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } spin_unlock_bh(&mfc_unres_lock); return err; } /* * MFC6 cache manipulation by user space */ static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, int parent) { struct mfc6_cache *c; /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, &mfc->mf6cc_mcastgrp.sin6_addr, parent); rcu_read_unlock(); if (!c) return -ENOENT; rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); list_del_rcu(&c->_c.list); call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), FIB_EVENT_ENTRY_DEL, c, mrt->id); mr6_netlink_event(mrt, c, RTM_DELROUTE); mr_cache_put(&c->_c); return 0; } static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; int ct; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; ip6mr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (rcu_access_pointer(v->dev) == dev) mif6_delete(mrt, ct, 1, NULL); } } return NOTIFY_DONE; } static unsigned int ip6mr_seq_read(const struct net *net) { return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } static int ip6mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, ip6mr_mr_table_iter, extack); } static struct notifier_block ip6_mr_notifier = { .notifier_call = ip6mr_device_event }; static const struct fib_notifier_ops ip6mr_notifier_ops_template = { .family = RTNL_FAMILY_IP6MR, .fib_seq_read = ip6mr_seq_read, .fib_dump = ip6mr_dump, .owner = THIS_MODULE, }; static int __net_init ip6mr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; net->ipv6.ipmr_seq = 0; ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); net->ipv6.ip6mr_notifier_ops = ops; return 0; } static void __net_exit ip6mr_notifier_exit(struct net *net) { fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); net->ipv6.ip6mr_notifier_ops = NULL; } /* Setup for IP multicast routing */ static int __net_init ip6mr_net_init(struct net *net) { int err; err = ip6mr_notifier_init(net); if (err) return err; err = ip6mr_rules_init(net); if (err < 0) goto ip6mr_rules_fail; #ifdef CONFIG_PROC_FS err = -ENOMEM; if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, sizeof(struct mr_vif_iter))) goto proc_vif_fail; if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, sizeof(struct mr_mfc_iter))) goto proc_cache_fail; #endif return 0; #ifdef CONFIG_PROC_FS proc_cache_fail: remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: rtnl_lock(); ip6mr_rules_exit(net); rtnl_unlock(); #endif ip6mr_rules_fail: ip6mr_notifier_exit(net); return err; } static void __net_exit ip6mr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS remove_proc_entry("ip6_mr_cache", net->proc_net); remove_proc_entry("ip6_mr_vif", net->proc_net); #endif ip6mr_notifier_exit(net); } static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) { struct net *net; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) ip6mr_rules_exit(net); rtnl_unlock(); } static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, .exit_batch = ip6mr_net_exit_batch, }; static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, .msgtype = RTM_GETROUTE, .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, }; int __init ip6_mr_init(void) { int err; mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); if (!mrt_cachep) return -ENOMEM; err = register_pernet_subsys(&ip6mr_net_ops); if (err) goto reg_pernet_fail; err = register_netdevice_notifier(&ip6_mr_notifier); if (err) goto reg_notif_fail; #ifdef CONFIG_IPV6_PIMSM_V2 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { pr_err("%s: can't add PIM protocol\n", __func__); err = -EAGAIN; goto add_proto_fail; } #endif err = rtnl_register_many(ip6mr_rtnl_msg_handlers); if (!err) return 0; #ifdef CONFIG_IPV6_PIMSM_V2 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); add_proto_fail: unregister_netdevice_notifier(&ip6_mr_notifier); #endif reg_notif_fail: unregister_pernet_subsys(&ip6mr_net_ops); reg_pernet_fail: kmem_cache_destroy(mrt_cachep); return err; } void __init ip6_mr_cleanup(void) { rtnl_unregister_many(ip6mr_rtnl_msg_handlers); #ifdef CONFIG_IPV6_PIMSM_V2 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); #endif unregister_netdevice_notifier(&ip6_mr_notifier); unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); } static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, struct mf6cctl *mfc, int mrtsock, int parent) { unsigned char ttls[MAXMIFS]; struct mfc6_cache *uc, *c; struct mr_mfc *_uc; bool found; int i, err; if (mfc->mf6cc_parent >= MAXMIFS) return -ENFILE; memset(ttls, 255, MAXMIFS); for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) ttls[i] = 1; } /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, &mfc->mf6cc_mcastgrp.sin6_addr, parent); rcu_read_unlock(); if (c) { spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mf6cc_parent; ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; spin_unlock(&mrt_lock); call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); if (!c) return -ENOMEM; c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; c->_c.mfc_parent = mfc->mf6cc_parent; ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, ip6mr_rht_params); if (err) { pr_err("ip6mr: rhtable insert error %d\n", err); ip6mr_cache_free(c); return err; } list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { uc = (struct mfc6_cache *)_uc; if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; } } if (list_empty(&mrt->mfc_unres_queue)) del_timer(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (found) { ip6mr_cache_resolve(net, mrt, uc, c); ip6mr_cache_free(uc); } call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } /* * Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct mr_mfc *c, *tmp; LIST_HEAD(list); int i; /* Shut down all active vif entries */ if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { for (i = 0; i < mrt->maxvif; i++) { if (((mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS_STATIC)) || (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) continue; mif6_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); } /* Wipe the cache */ if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); list_del_rcu(&c->list); call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), FIB_EVENT_ENTRY_DEL, (struct mfc6_cache *)c, mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); mr_cache_put(c); } } if (flags & MRT6_FLUSH_MFC) { if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } spin_unlock_bh(&mfc_unres_lock); } } } static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) { int err = 0; struct net *net = sock_net(sk); rtnl_lock(); spin_lock(&mrt_lock); if (rtnl_dereference(mrt->mroute_sk)) { err = -EADDRINUSE; } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } spin_unlock(&mrt_lock); if (!err) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); rtnl_unlock(); return err; } int ip6mr_sk_done(struct sock *sk) { struct net *net = sock_net(sk); struct ipv6_devconf *devconf; struct mr_table *mrt; int err = -EACCES; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return err; devconf = net->ipv6.devconf_all; if (!devconf || !atomic_read(&devconf->mc_forwarding)) return err; rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { spin_lock(&mrt_lock); RCU_INIT_POINTER(mrt->mroute_sk, NULL); /* Note that mroute_sk had SOCK_RCU_FREE set, * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ atomic_dec(&devconf->mc_forwarding); spin_unlock(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); err = 0; break; } } rtnl_unlock(); return err; } bool mroute6_is_socket(struct net *net, struct sk_buff *skb) { struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; return rcu_access_pointer(mrt->mroute_sk); } EXPORT_SYMBOL(mroute6_is_socket); /* * Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately * that's how BSD mrouted happens to think. Maybe one day with a proper * MOSPF/PIM router set up we can clean this up. */ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; struct net *net = sock_net(sk); struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; if (optname != MRT6_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EACCES; } switch (optname) { case MRT6_INIT: if (optlen < sizeof(int)) return -EINVAL; return ip6mr_sk_init(mrt, sk); case MRT6_DONE: return ip6mr_sk_done(sk); case MRT6_ADD_MIF: if (optlen < sizeof(vif)) return -EINVAL; if (copy_from_sockptr(&vif, optval, sizeof(vif))) return -EFAULT; if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); ret = mif6_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); rtnl_unlock(); return ret; case MRT6_DEL_MIF: if (optlen < sizeof(mifi_t)) return -EINVAL; if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); ret = mif6_delete(mrt, mifi, 0, NULL); rtnl_unlock(); return ret; /* * Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis. */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: parent = -1; fallthrough; case MRT6_ADD_MFC_PROXY: case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) return -EFAULT; if (parent == 0) parent = mfc.mf6cc_parent; rtnl_lock(); if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) ret = ip6mr_mfc_delete(mrt, &mfc, parent); else ret = ip6mr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); rtnl_unlock(); return ret; case MRT6_FLUSH: { int flags; if (optlen != sizeof(flags)) return -EINVAL; if (copy_from_sockptr(&flags, optval, sizeof(flags))) return -EFAULT; rtnl_lock(); mroute_clean_tables(mrt, flags); rtnl_unlock(); return 0; } /* * Control PIM assert (to activate pim will activate assert) */ case MRT6_ASSERT: { int v; if (optlen != sizeof(v)) return -EINVAL; if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; mrt->mroute_do_assert = v; return 0; } #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: { bool do_wrmifwhole; int v; if (optlen != sizeof(v)) return -EINVAL; if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); v = !!v; rtnl_lock(); ret = 0; if (v != mrt->mroute_do_pim) { mrt->mroute_do_pim = v; mrt->mroute_do_assert = v; mrt->mroute_do_wrvifwhole = do_wrmifwhole; } rtnl_unlock(); return ret; } #endif #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES case MRT6_TABLE: { u32 v; if (optlen != sizeof(u32)) return -EINVAL; if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (v != RT_TABLE_DEFAULT && v >= 100000000) return -EINVAL; if (sk == rcu_access_pointer(mrt->mroute_sk)) return -EBUSY; rtnl_lock(); ret = 0; mrt = ip6mr_new_table(net, v); if (IS_ERR(mrt)) ret = PTR_ERR(mrt); else raw6_sk(sk)->ip6mr_table = v; rtnl_unlock(); return ret; } #endif /* * Spurious command, or MRT6_VERSION which you cannot * set. */ default: return -ENOPROTOOPT; } } /* * Getsock opt support for the multicast routing system. */ int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, sockptr_t optlen) { int olr; int val; struct net *net = sock_net(sk); struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; switch (optname) { case MRT6_VERSION: val = 0x0305; break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: val = mrt->mroute_do_pim; break; #endif case MRT6_ASSERT: val = mrt->mroute_do_assert; break; default: return -ENOPROTOOPT; } if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; olr = min_t(int, olr, sizeof(int)); if (olr < 0) return -EINVAL; if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) return -EFAULT; return 0; } /* * The IP multicast ioctl support routines. */ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) { struct sioc_sg_req6 *sr; struct sioc_mif_req6 *vr; struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; switch (cmd) { case SIOCGETMIFCNT_IN6: vr = (struct sioc_mif_req6 *)arg; if (vr->mifi >= mrt->maxvif) return -EINVAL; vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); rcu_read_lock(); vif = &mrt->vif_table[vr->mifi]; if (VIF_EXISTS(mrt, vr->mifi)) { vr->icount = READ_ONCE(vif->pkt_in); vr->ocount = READ_ONCE(vif->pkt_out); vr->ibytes = READ_ONCE(vif->bytes_in); vr->obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: sr = (struct sioc_sg_req6 *)arg; rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, &sr->grp.sin6_addr); if (c) { sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); rcu_read_unlock(); return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; } } #ifdef CONFIG_COMPAT struct compat_sioc_sg_req6 { struct sockaddr_in6 src; struct sockaddr_in6 grp; compat_ulong_t pktcnt; compat_ulong_t bytecnt; compat_ulong_t wrong_if; }; struct compat_sioc_mif_req6 { mifi_t mifi; compat_ulong_t icount; compat_ulong_t ocount; compat_ulong_t ibytes; compat_ulong_t obytes; }; int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { struct compat_sioc_sg_req6 sr; struct compat_sioc_mif_req6 vr; struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; switch (cmd) { case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); rcu_read_lock(); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { vr.icount = READ_ONCE(vif->pkt_in); vr.ocount = READ_ONCE(vif->pkt_out); vr.ibytes = READ_ONCE(vif->bytes_in); vr.obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt); sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes); sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if); rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; } } #endif static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); return dst_output(net, sk, skb); } /* * Processing handlers for ip6mr_forward */ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi) { struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *vif_dev; struct ipv6hdr *ipv6h; struct dst_entry *dst; struct flowi6 fl6; vif_dev = vif_dev_read(vif); if (!vif_dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); DEV_STATS_INC(vif_dev, tx_packets); ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); goto out_free; } #endif ipv6h = ipv6_hdr(skb); fl6 = (struct flowi6) { .flowi6_oif = vif->link, .daddr = ipv6h->daddr, }; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); goto out_free; } skb_dst_drop(skb); skb_dst_set(skb, dst); /* * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface * program is joined. * If we will not make it, the program will have to join on all * interfaces. On the other hand, multihoming host (or router, but * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ skb->dev = vif_dev; WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); /* We are about to write */ /* XXX: extension headers? */ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) goto out_free; ipv6h = ipv6_hdr(skb); ipv6h->hop_limit--; IP6CB(skb)->flags |= IP6SKB_FORWARDED; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, vif_dev, ip6mr_forward2_finish); out_free: kfree_skb(skb); return 0; } /* Called with rcu_read_lock() */ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; } /* Called under rcu_read_lock() */ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *c) { int psend = -1; int vif, ct; int true_vifi = ip6mr_find_vif(mrt, dev); vif = c->_c.mfc_parent; atomic_long_inc(&c->_c.mfc_un.res.pkt); atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward; } /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { atomic_long_inc(&c->_c.mfc_un.res.wrong_if); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); if (mrt->mroute_do_wrvifwhole) ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRMIFWHOLE); } goto dont_forward; } forward: WRITE_ONCE(mrt->vif_table[vif].pkt_in, mrt->vif_table[vif].pkt_in + 1); WRITE_ONCE(mrt->vif_table[vif].bytes_in, mrt->vif_table[vif].bytes_in + skb->len); /* * Forward the frame */ if (ipv6_addr_any(&c->mf6c_origin) && ipv6_addr_any(&c->mf6c_mcastgrp)) { if (true_vifi >= 0 && true_vifi != c->_c.mfc_parent && ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } for (ct = c->_c.mfc_un.res.maxvif - 1; ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) ip6mr_forward2(net, mrt, skb2, psend); } psend = ct; } } last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, psend); return; } dont_forward: kfree_skb(skb); } /* * Multicast packets for forwarding arrive here */ int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; int err; struct net_device *dev; /* skb->dev passed in is the master dev for vrfs. * Get the proper interface that does have a vif associated with it. */ dev = skb->dev; if (netif_is_l3_master(skb->dev)) { dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) { kfree_skb(skb); return -ENODEV; } } err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) { kfree_skb(skb); return err; } cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); if (!cache) { int vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) cache = ip6mr_cache_find_any(mrt, &ipv6_hdr(skb)->daddr, vif); } /* * No usable cache entry */ if (!cache) { int vif; vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) { int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); return err; } kfree_skb(skb); return -ENODEV; } ip6_mr_forward(net, mrt, dev, skb, cache); return 0; } int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid) { int err; struct mr_table *mrt; struct mfc6_cache *cache; struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); rcu_read_lock(); mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) { rcu_read_unlock(); return -ENOENT; } cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache && skb->dev) { int vif = ip6mr_find_vif(mrt, skb->dev); if (vif >= 0) cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, vif); } if (!cache) { struct sk_buff *skb2; struct ipv6hdr *iph; struct net_device *dev; int vif; dev = skb->dev; if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { rcu_read_unlock(); return -ENODEV; } /* really correct? */ skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); if (!skb2) { rcu_read_unlock(); return -ENOMEM; } NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); skb_reset_network_header(skb2); iph = ipv6_hdr(skb2); iph->version = 0; iph->priority = 0; iph->flow_lbl[0] = 0; iph->flow_lbl[1] = 0; iph->flow_lbl[2] = 0; iph->payload_len = 0; iph->nexthdr = IPPROTO_NONE; iph->hop_limit = 0; iph->saddr = rt->rt6i_src.addr; iph->daddr = rt->rt6i_dst.addr; err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); rcu_read_unlock(); return err; } err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); rcu_read_unlock(); return err; } static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mfc6_cache *c, int cmd, int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = RTNL_FAMILY_IP6MR; rtm->rtm_dst_len = 128; rtm->rtm_src_len = 128; rtm->rtm_tos = 0; rtm->rtm_table = mrt->id; if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags) { return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, cmd, flags); } static int mr6_msgsize(bool unresolved, int maxvif) { size_t len = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ ; if (!unresolved) len = len + nla_total_size(4) /* RTA_IIF */ + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; } static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); return; errout: kfree_skb(skb); rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); } static size_t mrt6msg_netlink_msgsize(size_t payloadlen) { size_t len = NLMSG_ALIGN(sizeof(struct rtgenmsg)) + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ /* IP6MRA_CREPORT_SRC_ADDR */ + nla_total_size(sizeof(struct in6_addr)) /* IP6MRA_CREPORT_DST_ADDR */ + nla_total_size(sizeof(struct in6_addr)) /* IP6MRA_CREPORT_PKT */ + nla_total_size(payloadlen) ; return len; } static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; struct rtgenmsg *rtgenm; struct mrt6msg *msg; struct sk_buff *skb; struct nlattr *nla; int payloadlen; payloadlen = pkt->len - sizeof(struct mrt6msg); msg = (struct mrt6msg *)skb_transport_header(pkt); skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); if (!skb) goto errout; nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, sizeof(struct rtgenmsg), 0); if (!nlh) goto errout; rtgenm = nlmsg_data(nlh); rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, &msg->im6_src) || nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, &msg->im6_dst)) goto nla_put_failure; nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), nla_data(nla), payloadlen)) goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); return; nla_put_failure: nlmsg_cancel(skb, nlh); errout: kfree_skb(skb); rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); } static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = { [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [RTA_TABLE] = { .type = NLA_U32 }, }; static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct rtmsg *rtm; int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy, extack); if (err) return err; rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for multicast route get request"); return -EINVAL; } if ((tb[RTA_SRC] && !rtm->rtm_src_len) || (tb[RTA_DST] && !rtm->rtm_dst_len)) { NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); return -EINVAL; } return 0; } static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct in6_addr src = {}, grp = {}; struct nlattr *tb[RTA_MAX + 1]; struct mfc6_cache *cache; struct mr_table *mrt; struct sk_buff *skb; u32 tableid; int err; err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) return err; if (tb[RTA_SRC]) src = nla_get_in6_addr(tb[RTA_SRC]); if (tb[RTA_DST]) grp = nla_get_in6_addr(tb[RTA_DST]); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); if (!mrt) { NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); return -ENOENT; } /* entries are added/deleted only under RTNL */ rcu_read_lock(); cache = ip6mr_cache_find(mrt, &src, &grp); rcu_read_unlock(); if (!cache) { NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found"); return -ENOENT; } skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL); if (!skb) return -ENOBUFS; err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); if (err < 0) { kfree_skb(skb); return err; } return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); } static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct fib_dump_filter filter = { .rtnl_held = true, }; int err; if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, &filter, cb); if (err < 0) return err; } if (filter.table_id) { struct mr_table *mrt; mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) return skb->len; NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); return -ENOENT; } err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, &mfc_unres_lock, &filter); return skb->len ? : err; } return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, _ip6mr_fill_mroute, &mfc_unres_lock, &filter); }
211 83 30 42 353 36 176 21 21 8 39 1 53 6 3 1 10 4 50 2 18 2 2 2 4 2 113 1 7 2 3 11 27 131 8 131 23 62 1 6 2 28 1 24 2 1 129 129 3 60 1 2 2 168 4 1 158 1 1 2 41 125 1 1 34 6 1 4 60 120 116 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH * Copyright (C) 2018, 2020-2024 Intel Corporation */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 #if !defined(__RDEV_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __RDEV_OPS_TRACE #include <linux/tracepoint.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <net/cfg80211.h> #include "core.h" #define MAC_ENTRY(entry_mac) __array(u8, entry_mac, ETH_ALEN) #define MAC_ASSIGN(entry_mac, given_mac) do { \ if (given_mac) \ memcpy(__entry->entry_mac, given_mac, ETH_ALEN); \ else \ eth_zero_addr(__entry->entry_mac); \ } while (0) #define MAXNAME 32 #define WIPHY_ENTRY __array(char, wiphy_name, 32) #define WIPHY_ASSIGN strscpy(__entry->wiphy_name, wiphy_name(wiphy), MAXNAME) #define WIPHY_PR_FMT "%s" #define WIPHY_PR_ARG __entry->wiphy_name #define WDEV_ENTRY __field(u32, id) #define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \ ? wdev->identifier : 0) #define WDEV_PR_FMT "wdev(%u)" #define WDEV_PR_ARG (__entry->id) #define NETDEV_ENTRY __array(char, name, IFNAMSIZ) \ __field(int, ifindex) #define NETDEV_ASSIGN \ do { \ memcpy(__entry->name, netdev->name, IFNAMSIZ); \ (__entry->ifindex) = (netdev->ifindex); \ } while (0) #define NETDEV_PR_FMT "netdev:%s(%d)" #define NETDEV_PR_ARG __entry->name, __entry->ifindex #define MESH_CFG_ENTRY __field(u16, dot11MeshRetryTimeout) \ __field(u16, dot11MeshConfirmTimeout) \ __field(u16, dot11MeshHoldingTimeout) \ __field(u16, dot11MeshMaxPeerLinks) \ __field(u8, dot11MeshMaxRetries) \ __field(u8, dot11MeshTTL) \ __field(u8, element_ttl) \ __field(bool, auto_open_plinks) \ __field(u32, dot11MeshNbrOffsetMaxNeighbor) \ __field(u8, dot11MeshHWMPmaxPREQretries) \ __field(u32, path_refresh_time) \ __field(u32, dot11MeshHWMPactivePathTimeout) \ __field(u16, min_discovery_timeout) \ __field(u16, dot11MeshHWMPpreqMinInterval) \ __field(u16, dot11MeshHWMPperrMinInterval) \ __field(u16, dot11MeshHWMPnetDiameterTraversalTime) \ __field(u8, dot11MeshHWMPRootMode) \ __field(u16, dot11MeshHWMPRannInterval) \ __field(bool, dot11MeshGateAnnouncementProtocol) \ __field(bool, dot11MeshForwarding) \ __field(s32, rssi_threshold) \ __field(u16, ht_opmode) \ __field(u32, dot11MeshHWMPactivePathToRootTimeout) \ __field(u16, dot11MeshHWMProotInterval) \ __field(u16, dot11MeshHWMPconfirmationInterval) \ __field(bool, dot11MeshNolearn) #define MESH_CFG_ASSIGN \ do { \ __entry->dot11MeshRetryTimeout = conf->dot11MeshRetryTimeout; \ __entry->dot11MeshConfirmTimeout = \ conf->dot11MeshConfirmTimeout; \ __entry->dot11MeshHoldingTimeout = \ conf->dot11MeshHoldingTimeout; \ __entry->dot11MeshMaxPeerLinks = conf->dot11MeshMaxPeerLinks; \ __entry->dot11MeshMaxRetries = conf->dot11MeshMaxRetries; \ __entry->dot11MeshTTL = conf->dot11MeshTTL; \ __entry->element_ttl = conf->element_ttl; \ __entry->auto_open_plinks = conf->auto_open_plinks; \ __entry->dot11MeshNbrOffsetMaxNeighbor = \ conf->dot11MeshNbrOffsetMaxNeighbor; \ __entry->dot11MeshHWMPmaxPREQretries = \ conf->dot11MeshHWMPmaxPREQretries; \ __entry->path_refresh_time = conf->path_refresh_time; \ __entry->dot11MeshHWMPactivePathTimeout = \ conf->dot11MeshHWMPactivePathTimeout; \ __entry->min_discovery_timeout = conf->min_discovery_timeout; \ __entry->dot11MeshHWMPpreqMinInterval = \ conf->dot11MeshHWMPpreqMinInterval; \ __entry->dot11MeshHWMPperrMinInterval = \ conf->dot11MeshHWMPperrMinInterval; \ __entry->dot11MeshHWMPnetDiameterTraversalTime = \ conf->dot11MeshHWMPnetDiameterTraversalTime; \ __entry->dot11MeshHWMPRootMode = conf->dot11MeshHWMPRootMode; \ __entry->dot11MeshHWMPRannInterval = \ conf->dot11MeshHWMPRannInterval; \ __entry->dot11MeshGateAnnouncementProtocol = \ conf->dot11MeshGateAnnouncementProtocol; \ __entry->dot11MeshForwarding = conf->dot11MeshForwarding; \ __entry->rssi_threshold = conf->rssi_threshold; \ __entry->ht_opmode = conf->ht_opmode; \ __entry->dot11MeshHWMPactivePathToRootTimeout = \ conf->dot11MeshHWMPactivePathToRootTimeout; \ __entry->dot11MeshHWMProotInterval = \ conf->dot11MeshHWMProotInterval; \ __entry->dot11MeshHWMPconfirmationInterval = \ conf->dot11MeshHWMPconfirmationInterval; \ __entry->dot11MeshNolearn = conf->dot11MeshNolearn; \ } while (0) #define CHAN_ENTRY __field(enum nl80211_band, band) \ __field(u32, center_freq) \ __field(u16, freq_offset) #define CHAN_ASSIGN(chan) \ do { \ if (chan) { \ __entry->band = chan->band; \ __entry->center_freq = chan->center_freq; \ __entry->freq_offset = chan->freq_offset; \ } else { \ __entry->band = 0; \ __entry->center_freq = 0; \ __entry->freq_offset = 0; \ } \ } while (0) #define CHAN_PR_FMT "band: %d, freq: %u.%03u" #define CHAN_PR_ARG __entry->band, __entry->center_freq, __entry->freq_offset #define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \ __field(u32, control_freq) \ __field(u32, freq_offset) \ __field(u32, width) \ __field(u32, center_freq1) \ __field(u32, freq1_offset) \ __field(u32, center_freq2) \ __field(u16, punctured) #define CHAN_DEF_ASSIGN(chandef) \ do { \ if ((chandef) && (chandef)->chan) { \ __entry->band = (chandef)->chan->band; \ __entry->control_freq = \ (chandef)->chan->center_freq; \ __entry->freq_offset = \ (chandef)->chan->freq_offset; \ __entry->width = (chandef)->width; \ __entry->center_freq1 = (chandef)->center_freq1;\ __entry->freq1_offset = (chandef)->freq1_offset;\ __entry->center_freq2 = (chandef)->center_freq2;\ __entry->punctured = (chandef)->punctured; \ } else { \ __entry->band = 0; \ __entry->control_freq = 0; \ __entry->freq_offset = 0; \ __entry->width = 0; \ __entry->center_freq1 = 0; \ __entry->freq1_offset = 0; \ __entry->center_freq2 = 0; \ __entry->punctured = 0; \ } \ } while (0) #define CHAN_DEF_PR_FMT \ "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u, punct: 0x%x" #define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \ __entry->freq_offset, __entry->width, \ __entry->center_freq1, __entry->freq1_offset, \ __entry->center_freq2, __entry->punctured #define FILS_AAD_ASSIGN(fa) \ do { \ if (fa) { \ ether_addr_copy(__entry->macaddr, fa->macaddr); \ __entry->kek_len = fa->kek_len; \ } else { \ eth_zero_addr(__entry->macaddr); \ __entry->kek_len = 0; \ } \ } while (0) #define FILS_AAD_PR_FMT \ "macaddr: %pM, kek_len: %d" #define SINFO_ENTRY __field(int, generation) \ __field(u32, connected_time) \ __field(u32, inactive_time) \ __field(u32, rx_bytes) \ __field(u32, tx_bytes) \ __field(u32, rx_packets) \ __field(u32, tx_packets) \ __field(u32, tx_retries) \ __field(u32, tx_failed) \ __field(u32, rx_dropped_misc) \ __field(u32, beacon_loss_count) \ __field(u16, llid) \ __field(u16, plid) \ __field(u8, plink_state) #define SINFO_ASSIGN \ do { \ __entry->generation = sinfo->generation; \ __entry->connected_time = sinfo->connected_time; \ __entry->inactive_time = sinfo->inactive_time; \ __entry->rx_bytes = sinfo->rx_bytes; \ __entry->tx_bytes = sinfo->tx_bytes; \ __entry->rx_packets = sinfo->rx_packets; \ __entry->tx_packets = sinfo->tx_packets; \ __entry->tx_retries = sinfo->tx_retries; \ __entry->tx_failed = sinfo->tx_failed; \ __entry->rx_dropped_misc = sinfo->rx_dropped_misc; \ __entry->beacon_loss_count = sinfo->beacon_loss_count; \ __entry->llid = sinfo->llid; \ __entry->plid = sinfo->plid; \ __entry->plink_state = sinfo->plink_state; \ } while (0) #define BOOL_TO_STR(bo) (bo) ? "true" : "false" #define QOS_MAP_ENTRY __field(u8, num_des) \ __array(u8, dscp_exception, \ 2 * IEEE80211_QOS_MAP_MAX_EX) \ __array(u8, up, IEEE80211_QOS_MAP_LEN_MIN) #define QOS_MAP_ASSIGN(qos_map) \ do { \ if ((qos_map)) { \ __entry->num_des = (qos_map)->num_des; \ memcpy(__entry->dscp_exception, \ &(qos_map)->dscp_exception, \ 2 * IEEE80211_QOS_MAP_MAX_EX); \ memcpy(__entry->up, &(qos_map)->up, \ IEEE80211_QOS_MAP_LEN_MIN); \ } else { \ __entry->num_des = 0; \ memset(__entry->dscp_exception, 0, \ 2 * IEEE80211_QOS_MAP_MAX_EX); \ memset(__entry->up, 0, \ IEEE80211_QOS_MAP_LEN_MIN); \ } \ } while (0) /************************************************************* * wiphy work traces * *************************************************************/ DECLARE_EVENT_CLASS(wiphy_work_event, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), TP_ARGS(wiphy, work), TP_STRUCT__entry( WIPHY_ENTRY __field(void *, instance) __field(void *, func) ), TP_fast_assign( WIPHY_ASSIGN; __entry->instance = work; __entry->func = work ? work->func : NULL; ), TP_printk(WIPHY_PR_FMT " instance=%p func=%pS", WIPHY_PR_ARG, __entry->instance, __entry->func) ); DEFINE_EVENT(wiphy_work_event, wiphy_work_queue, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), TP_ARGS(wiphy, work) ); DEFINE_EVENT(wiphy_work_event, wiphy_work_run, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), TP_ARGS(wiphy, work) ); DEFINE_EVENT(wiphy_work_event, wiphy_work_cancel, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), TP_ARGS(wiphy, work) ); DEFINE_EVENT(wiphy_work_event, wiphy_work_flush, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), TP_ARGS(wiphy, work) ); TRACE_EVENT(wiphy_delayed_work_queue, TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, unsigned long delay), TP_ARGS(wiphy, work, delay), TP_STRUCT__entry( WIPHY_ENTRY __field(void *, instance) __field(void *, func) __field(unsigned long, delay) ), TP_fast_assign( WIPHY_ASSIGN; __entry->instance = work; __entry->func = work->func; __entry->delay = delay; ), TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%ld", WIPHY_PR_ARG, __entry->instance, __entry->func, __entry->delay) ); TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), TP_STRUCT__entry( WIPHY_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; ), TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) ); /************************************************************* * rdev->ops traces * *************************************************************/ TRACE_EVENT(rdev_suspend, TP_PROTO(struct wiphy *wiphy, struct cfg80211_wowlan *wow), TP_ARGS(wiphy, wow), TP_STRUCT__entry( WIPHY_ENTRY __field(bool, any) __field(bool, disconnect) __field(bool, magic_pkt) __field(bool, gtk_rekey_failure) __field(bool, eap_identity_req) __field(bool, four_way_handshake) __field(bool, rfkill_release) __field(bool, valid_wow) ), TP_fast_assign( WIPHY_ASSIGN; if (wow) { __entry->any = wow->any; __entry->disconnect = wow->disconnect; __entry->magic_pkt = wow->magic_pkt; __entry->gtk_rekey_failure = wow->gtk_rekey_failure; __entry->eap_identity_req = wow->eap_identity_req; __entry->four_way_handshake = wow->four_way_handshake; __entry->rfkill_release = wow->rfkill_release; __entry->valid_wow = true; } else { __entry->valid_wow = false; } ), TP_printk(WIPHY_PR_FMT ", wow%s - any: %d, disconnect: %d, " "magic pkt: %d, gtk rekey failure: %d, eap identify req: %d, " "four way handshake: %d, rfkill release: %d.", WIPHY_PR_ARG, __entry->valid_wow ? "" : "(Not configured!)", __entry->any, __entry->disconnect, __entry->magic_pkt, __entry->gtk_rekey_failure, __entry->eap_identity_req, __entry->four_way_handshake, __entry->rfkill_release) ); TRACE_EVENT(rdev_return_int, TP_PROTO(struct wiphy *wiphy, int ret), TP_ARGS(wiphy, ret), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) ), TP_fast_assign( WIPHY_ASSIGN; __entry->ret = ret; ), TP_printk(WIPHY_PR_FMT ", returned: %d", WIPHY_PR_ARG, __entry->ret) ); TRACE_EVENT(rdev_scan, TP_PROTO(struct wiphy *wiphy, struct cfg80211_scan_request *request), TP_ARGS(wiphy, request), TP_STRUCT__entry( WIPHY_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; ), TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) ); DECLARE_EVENT_CLASS(wiphy_only_evt, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), TP_STRUCT__entry( WIPHY_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; ), TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) ); DEFINE_EVENT(wiphy_only_evt, rdev_resume, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) ); DEFINE_EVENT(wiphy_only_evt, rdev_return_void, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) ); DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) ); DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) ); DECLARE_EVENT_CLASS(wiphy_enabled_evt, TP_PROTO(struct wiphy *wiphy, bool enabled), TP_ARGS(wiphy, enabled), TP_STRUCT__entry( WIPHY_ENTRY __field(bool, enabled) ), TP_fast_assign( WIPHY_ASSIGN; __entry->enabled = enabled; ), TP_printk(WIPHY_PR_FMT ", %senabled ", WIPHY_PR_ARG, __entry->enabled ? "" : "not ") ); DEFINE_EVENT(wiphy_enabled_evt, rdev_set_wakeup, TP_PROTO(struct wiphy *wiphy, bool enabled), TP_ARGS(wiphy, enabled) ); TRACE_EVENT(rdev_add_virtual_intf, TP_PROTO(struct wiphy *wiphy, char *name, enum nl80211_iftype type), TP_ARGS(wiphy, name, type), TP_STRUCT__entry( WIPHY_ENTRY __string(vir_intf_name, name ? name : "<noname>") __field(enum nl80211_iftype, type) ), TP_fast_assign( WIPHY_ASSIGN; __assign_str(vir_intf_name); __entry->type = type; ), TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d", WIPHY_PR_ARG, __get_str(vir_intf_name), __entry->type) ); DECLARE_EVENT_CLASS(wiphy_wdev_evt, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); DECLARE_EVENT_CLASS(wiphy_wdev_cookie_evt, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %lld", WIPHY_PR_ARG, WDEV_PR_ARG, (unsigned long long)__entry->cookie) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_return_wdev, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_del_virtual_intf, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); TRACE_EVENT(rdev_change_virtual_intf, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, enum nl80211_iftype type), TP_ARGS(wiphy, netdev, type), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(enum nl80211_iftype, type) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->type = type; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", type: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->type) ); DECLARE_EVENT_CLASS(key_handle, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) __field(bool, pairwise) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " "key_index: %u, pairwise: %s, mac addr: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->key_index, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); DEFINE_EVENT(key_handle, rdev_get_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) ); DEFINE_EVENT(key_handle, rdev_del_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) ); TRACE_EVENT(rdev_add_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode), TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) __field(bool, pairwise) __field(u8, mode) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; __entry->mode = mode; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " "key_index: %u, mode: %u, pairwise: %s, " "mac addr: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->key_index, __entry->mode, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); TRACE_EVENT(rdev_set_default_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast), TP_ARGS(wiphy, netdev, link_id, key_index, unicast, multicast), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) __field(u8, key_index) __field(bool, unicast) __field(bool, multicast) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; __entry->unicast = unicast; __entry->multicast = multicast; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " "key index: %u, unicast: %s, multicast: %s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->key_index, BOOL_TO_STR(__entry->unicast), BOOL_TO_STR(__entry->multicast)) ); TRACE_EVENT(rdev_set_default_mgmt_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index), TP_ARGS(wiphy, netdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->key_index) ); TRACE_EVENT(rdev_set_default_beacon_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index), TP_ARGS(wiphy, netdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->key_index) ); TRACE_EVENT(rdev_start_ap, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ap_settings *settings), TP_ARGS(wiphy, netdev, settings), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY __field(int, beacon_interval) __field(int, dtim_period) __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1) __field(enum nl80211_hidden_ssid, hidden_ssid) __field(u32, wpa_ver) __field(bool, privacy) __field(enum nl80211_auth_type, auth_type) __field(int, inactivity_timeout) __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(&settings->chandef); __entry->beacon_interval = settings->beacon_interval; __entry->dtim_period = settings->dtim_period; __entry->hidden_ssid = settings->hidden_ssid; __entry->wpa_ver = settings->crypto.wpa_versions; __entry->privacy = settings->privacy; __entry->auth_type = settings->auth_type; __entry->inactivity_timeout = settings->inactivity_timeout; memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, settings->ssid, settings->ssid_len); __entry->link_id = settings->beacon.link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", AP settings - ssid: %s, " CHAN_DEF_PR_FMT ", beacon interval: %d, dtim period: %d, " "hidden ssid: %d, wpa versions: %u, privacy: %s, " "auth type: %d, inactivity timeout: %d, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ssid, CHAN_DEF_PR_ARG, __entry->beacon_interval, __entry->dtim_period, __entry->hidden_ssid, __entry->wpa_ver, BOOL_TO_STR(__entry->privacy), __entry->auth_type, __entry->inactivity_timeout, __entry->link_id) ); TRACE_EVENT(rdev_change_beacon, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ap_update *info), TP_ARGS(wiphy, netdev, info), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) __dynamic_array(u8, head, info->beacon.head_len) __dynamic_array(u8, tail, info->beacon.tail_len) __dynamic_array(u8, beacon_ies, info->beacon.beacon_ies_len) __dynamic_array(u8, proberesp_ies, info->beacon.proberesp_ies_len) __dynamic_array(u8, assocresp_ies, info->beacon.assocresp_ies_len) __dynamic_array(u8, probe_resp, info->beacon.probe_resp_len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = info->beacon.link_id; if (info->beacon.head) memcpy(__get_dynamic_array(head), info->beacon.head, info->beacon.head_len); if (info->beacon.tail) memcpy(__get_dynamic_array(tail), info->beacon.tail, info->beacon.tail_len); if (info->beacon.beacon_ies) memcpy(__get_dynamic_array(beacon_ies), info->beacon.beacon_ies, info->beacon.beacon_ies_len); if (info->beacon.proberesp_ies) memcpy(__get_dynamic_array(proberesp_ies), info->beacon.proberesp_ies, info->beacon.proberesp_ies_len); if (info->beacon.assocresp_ies) memcpy(__get_dynamic_array(assocresp_ies), info->beacon.assocresp_ies, info->beacon.assocresp_ies_len); if (info->beacon.probe_resp) memcpy(__get_dynamic_array(probe_resp), info->beacon.probe_resp, info->beacon.probe_resp_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) ); TRACE_EVENT(rdev_stop_ap, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, unsigned int link_id), TP_ARGS(wiphy, netdev, link_id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) ); DECLARE_EVENT_CLASS(wiphy_netdev_evt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_get_mesh_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_mesh, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ibss, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ocb, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) ); TRACE_EVENT(rdev_end_cac, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, unsigned int link_id), TP_ARGS(wiphy, netdev, link_id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) ); DECLARE_EVENT_CLASS(station_add_change, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), TP_ARGS(wiphy, netdev, mac, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(sta_mac) __field(u32, sta_flags_mask) __field(u32, sta_flags_set) __field(u32, sta_modify_mask) __field(int, listen_interval) __field(u16, capability) __field(u16, aid) __field(u8, plink_action) __field(u8, plink_state) __field(u8, uapsd_queues) __field(u8, max_sp) __field(u8, opmode_notif) __field(bool, opmode_notif_used) __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap)) __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap)) __array(char, vlan, IFNAMSIZ) __dynamic_array(u8, supported_rates, params->link_sta_params.supported_rates_len) __dynamic_array(u8, ext_capab, params->ext_capab_len) __dynamic_array(u8, supported_channels, params->supported_channels_len) __dynamic_array(u8, supported_oper_classes, params->supported_oper_classes_len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->sta_flags_mask = params->sta_flags_mask; __entry->sta_flags_set = params->sta_flags_set; __entry->sta_modify_mask = params->sta_modify_mask; __entry->listen_interval = params->listen_interval; __entry->aid = params->aid; __entry->plink_action = params->plink_action; __entry->plink_state = params->plink_state; __entry->uapsd_queues = params->uapsd_queues; memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap)); if (params->link_sta_params.ht_capa) memcpy(__entry->ht_capa, params->link_sta_params.ht_capa, sizeof(struct ieee80211_ht_cap)); memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap)); if (params->link_sta_params.vht_capa) memcpy(__entry->vht_capa, params->link_sta_params.vht_capa, sizeof(struct ieee80211_vht_cap)); memset(__entry->vlan, 0, sizeof(__entry->vlan)); if (params->vlan) memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ); if (params->link_sta_params.supported_rates && params->link_sta_params.supported_rates_len) memcpy(__get_dynamic_array(supported_rates), params->link_sta_params.supported_rates, params->link_sta_params.supported_rates_len); if (params->ext_capab && params->ext_capab_len) memcpy(__get_dynamic_array(ext_capab), params->ext_capab, params->ext_capab_len); if (params->supported_channels && params->supported_channels_len) memcpy(__get_dynamic_array(supported_channels), params->supported_channels, params->supported_channels_len); if (params->supported_oper_classes && params->supported_oper_classes_len) memcpy(__get_dynamic_array(supported_oper_classes), params->supported_oper_classes, params->supported_oper_classes_len); __entry->max_sp = params->max_sp; __entry->capability = params->capability; __entry->opmode_notif = params->link_sta_params.opmode_notif; __entry->opmode_notif_used = params->link_sta_params.opmode_notif_used; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" ", station flags mask: 0x%x, station flags set: 0x%x, " "station modify mask: 0x%x, listen interval: %d, aid: %u, " "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, __entry->sta_flags_mask, __entry->sta_flags_set, __entry->sta_modify_mask, __entry->listen_interval, __entry->aid, __entry->plink_action, __entry->plink_state, __entry->uapsd_queues, __entry->vlan) ); DEFINE_EVENT(station_add_change, rdev_add_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), TP_ARGS(wiphy, netdev, mac, params) ); DEFINE_EVENT(station_add_change, rdev_change_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), TP_ARGS(wiphy, netdev, mac, params) ); DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), TP_ARGS(wiphy, netdev, mac), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(sta_mac) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac) ); DECLARE_EVENT_CLASS(station_del, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct station_del_parameters *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(sta_mac) __field(u8, subtype) __field(u16, reason_code) __field(int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(sta_mac, params->mac); __entry->subtype = params->subtype; __entry->reason_code = params->reason_code; __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" ", subtype: %u, reason_code: %u, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, __entry->subtype, __entry->reason_code, __entry->link_id) ); DEFINE_EVENT(station_del, rdev_del_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct station_del_parameters *params), TP_ARGS(wiphy, netdev, params) ); DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_get_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), TP_ARGS(wiphy, netdev, mac) ); DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), TP_ARGS(wiphy, netdev, mac) ); TRACE_EVENT(rdev_dump_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *mac), TP_ARGS(wiphy, netdev, _idx, mac), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(sta_mac) __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->idx = _idx; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM, idx: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, __entry->idx) ); TRACE_EVENT(rdev_return_int_station_info, TP_PROTO(struct wiphy *wiphy, int ret, struct station_info *sinfo), TP_ARGS(wiphy, ret, sinfo), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) SINFO_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; __entry->ret = ret; SINFO_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", returned %d" , WIPHY_PR_ARG, __entry->ret) ); DECLARE_EVENT_CLASS(mpath_evt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst, u8 *next_hop), TP_ARGS(wiphy, netdev, dst, next_hop), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dst) MAC_ENTRY(next_hop) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dst, dst); MAC_ASSIGN(next_hop, next_hop); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM, next hop: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dst, __entry->next_hop) ); DEFINE_EVENT(mpath_evt, rdev_add_mpath, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst, u8 *next_hop), TP_ARGS(wiphy, netdev, dst, next_hop) ); DEFINE_EVENT(mpath_evt, rdev_change_mpath, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst, u8 *next_hop), TP_ARGS(wiphy, netdev, dst, next_hop) ); DEFINE_EVENT(mpath_evt, rdev_get_mpath, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst, u8 *next_hop), TP_ARGS(wiphy, netdev, dst, next_hop) ); TRACE_EVENT(rdev_dump_mpath, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *next_hop), TP_ARGS(wiphy, netdev, _idx, dst, next_hop), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dst) MAC_ENTRY(next_hop) __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dst, dst); MAC_ASSIGN(next_hop, next_hop); __entry->idx = _idx; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, next hop: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst, __entry->next_hop) ); TRACE_EVENT(rdev_get_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst, u8 *mpp), TP_ARGS(wiphy, netdev, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dst) MAC_ENTRY(mpp) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dst, dst); MAC_ASSIGN(mpp, mpp); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM" ", mpp: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dst, __entry->mpp) ); TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dst) MAC_ENTRY(mpp) __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dst, dst); MAC_ASSIGN(mpp, mpp); __entry->idx = _idx; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, mpp: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst, __entry->mpp) ); TRACE_EVENT(rdev_return_int_mpath_info, TP_PROTO(struct wiphy *wiphy, int ret, struct mpath_info *pinfo), TP_ARGS(wiphy, ret, pinfo), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) __field(int, generation) __field(u32, filled) __field(u32, frame_qlen) __field(u32, sn) __field(u32, metric) __field(u32, exptime) __field(u32, discovery_timeout) __field(u8, discovery_retries) __field(u8, flags) ), TP_fast_assign( WIPHY_ASSIGN; __entry->ret = ret; __entry->generation = pinfo->generation; __entry->filled = pinfo->filled; __entry->frame_qlen = pinfo->frame_qlen; __entry->sn = pinfo->sn; __entry->metric = pinfo->metric; __entry->exptime = pinfo->exptime; __entry->discovery_timeout = pinfo->discovery_timeout; __entry->discovery_retries = pinfo->discovery_retries; __entry->flags = pinfo->flags; ), TP_printk(WIPHY_PR_FMT ", returned %d. mpath info - generation: %d, " "filled: %u, frame qlen: %u, sn: %u, metric: %u, exptime: %u," " discovery timeout: %u, discovery retries: %u, flags: 0x%x", WIPHY_PR_ARG, __entry->ret, __entry->generation, __entry->filled, __entry->frame_qlen, __entry->sn, __entry->metric, __entry->exptime, __entry->discovery_timeout, __entry->discovery_retries, __entry->flags) ); TRACE_EVENT(rdev_return_int_mesh_config, TP_PROTO(struct wiphy *wiphy, int ret, struct mesh_config *conf), TP_ARGS(wiphy, ret, conf), TP_STRUCT__entry( WIPHY_ENTRY MESH_CFG_ENTRY __field(int, ret) ), TP_fast_assign( WIPHY_ASSIGN; MESH_CFG_ASSIGN; __entry->ret = ret; ), TP_printk(WIPHY_PR_FMT ", returned: %d", WIPHY_PR_ARG, __entry->ret) ); TRACE_EVENT(rdev_update_mesh_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 mask, const struct mesh_config *conf), TP_ARGS(wiphy, netdev, mask, conf), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MESH_CFG_ENTRY __field(u32, mask) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MESH_CFG_ASSIGN; __entry->mask = mask; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mask: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mask) ); TRACE_EVENT(rdev_join_mesh, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const struct mesh_config *conf, const struct mesh_setup *setup), TP_ARGS(wiphy, netdev, conf, setup), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MESH_CFG_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MESH_CFG_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); TRACE_EVENT(rdev_change_bss, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct bss_parameters *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, use_cts_prot) __field(int, use_short_preamble) __field(int, use_short_slot_time) __field(int, ap_isolate) __field(int, ht_opmode) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->use_cts_prot = params->use_cts_prot; __entry->use_short_preamble = params->use_short_preamble; __entry->use_short_slot_time = params->use_short_slot_time; __entry->ap_isolate = params->ap_isolate; __entry->ht_opmode = params->ht_opmode; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", use cts prot: %d, " "use short preamble: %d, use short slot time: %d, " "ap isolate: %d, ht opmode: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->use_cts_prot, __entry->use_short_preamble, __entry->use_short_slot_time, __entry->ap_isolate, __entry->ht_opmode) ); TRACE_EVENT(rdev_inform_bss, TP_PROTO(struct wiphy *wiphy, struct cfg80211_bss *bss), TP_ARGS(wiphy, bss), TP_STRUCT__entry( WIPHY_ENTRY MAC_ENTRY(bssid) CHAN_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; MAC_ASSIGN(bssid, bss->bssid); CHAN_ASSIGN(bss->channel); ), TP_printk(WIPHY_PR_FMT ", %pM, " CHAN_PR_FMT, WIPHY_PR_ARG, __entry->bssid, CHAN_PR_ARG) ); TRACE_EVENT(rdev_set_txq_params, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct ieee80211_txq_params *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(enum nl80211_ac, ac) __field(u16, txop) __field(u16, cwmin) __field(u16, cwmax) __field(u8, aifs) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->ac = params->ac; __entry->txop = params->txop; __entry->cwmin = params->cwmin; __entry->cwmax = params->cwmax; __entry->aifs = params->aifs; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", ac: %d, txop: %u, cwmin: %u, cwmax: %u, aifs: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ac, __entry->txop, __entry->cwmin, __entry->cwmax, __entry->aifs) ); TRACE_EVENT(rdev_libertas_set_mesh_channel, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct ieee80211_channel *chan), TP_ARGS(wiphy, netdev, chan), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_ASSIGN(chan); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_PR_ARG) ); TRACE_EVENT(rdev_set_monitor_channel, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, netdev, chandef), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); TRACE_EVENT(rdev_auth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_auth_request *req), TP_ARGS(wiphy, netdev, req), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __field(enum nl80211_auth_type, auth_type) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else eth_zero_addr(__entry->bssid); __entry->auth_type = req->auth_type; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->auth_type, __entry->bssid) ); TRACE_EVENT(rdev_assoc, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_assoc_request *req), TP_ARGS(wiphy, netdev, req), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) MAC_ENTRY(prev_bssid) __field(bool, use_mfp) __field(u32, flags) __dynamic_array(u8, elements, req->ie_len) __array(u8, ht_capa, sizeof(struct ieee80211_ht_cap)) __array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap)) __array(u8, vht_capa, sizeof(struct ieee80211_vht_cap)) __array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap)) __dynamic_array(u8, fils_kek, req->fils_kek_len) __dynamic_array(u8, fils_nonces, req->fils_nonces ? 2 * FILS_NONCE_LEN : 0) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; if (req->bss) MAC_ASSIGN(bssid, req->bss->bssid); else eth_zero_addr(__entry->bssid); MAC_ASSIGN(prev_bssid, req->prev_bssid); __entry->use_mfp = req->use_mfp; __entry->flags = req->flags; if (req->ie) memcpy(__get_dynamic_array(elements), req->ie, req->ie_len); memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa)); memcpy(__entry->ht_capa_mask, &req->ht_capa_mask, sizeof(req->ht_capa_mask)); memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa)); memcpy(__entry->vht_capa_mask, &req->vht_capa_mask, sizeof(req->vht_capa_mask)); if (req->fils_kek) memcpy(__get_dynamic_array(fils_kek), req->fils_kek, req->fils_kek_len); if (req->fils_nonces) memcpy(__get_dynamic_array(fils_nonces), req->fils_nonces, 2 * FILS_NONCE_LEN); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", previous bssid: %pM, use mfp: %s, flags: 0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp), __entry->flags) ); TRACE_EVENT(rdev_deauth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_deauth_request *req), TP_ARGS(wiphy, netdev, req), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __field(u16, reason_code) __field(bool, local_state_change) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, req->bssid); __entry->reason_code = req->reason_code; __entry->local_state_change = req->local_state_change; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u, local_state_change:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->reason_code, __entry->local_state_change) ); TRACE_EVENT(rdev_disassoc, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_disassoc_request *req), TP_ARGS(wiphy, netdev, req), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __field(u16, reason_code) __field(bool, local_state_change) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, req->ap_addr); __entry->reason_code = req->reason_code; __entry->local_state_change = req->local_state_change; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", reason: %u, local state change: %s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->reason_code, BOOL_TO_STR(__entry->local_state_change)) ); TRACE_EVENT(rdev_mgmt_tx_cancel_wait, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %llu ", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) ); TRACE_EVENT(rdev_set_power_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, bool enabled, int timeout), TP_ARGS(wiphy, netdev, enabled, timeout), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(bool, enabled) __field(int, timeout) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->enabled = enabled; __entry->timeout = timeout; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %senabled, timeout: %d ", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->enabled ? "" : "not ", __entry->timeout) ); TRACE_EVENT(rdev_connect, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_connect_params *sme), TP_ARGS(wiphy, netdev, sme), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1) __field(enum nl80211_auth_type, auth_type) __field(bool, privacy) __field(u32, wpa_versions) __field(u32, flags) MAC_ENTRY(prev_bssid) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, sme->bssid); memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, sme->ssid, sme->ssid_len); __entry->auth_type = sme->auth_type; __entry->privacy = sme->privacy; __entry->wpa_versions = sme->crypto.wpa_versions; __entry->flags = sme->flags; MAC_ASSIGN(prev_bssid, sme->prev_bssid); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, " "flags: 0x%x, previous bssid: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid, __entry->auth_type, BOOL_TO_STR(__entry->privacy), __entry->wpa_versions, __entry->flags, __entry->prev_bssid) ); TRACE_EVENT(rdev_update_connect_params, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_connect_params *sme, u32 changed), TP_ARGS(wiphy, netdev, sme, changed), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u32, changed) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->changed = changed; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed) ); TRACE_EVENT(rdev_set_cqm_rssi_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, s32 rssi_thold, u32 rssi_hyst), TP_ARGS(wiphy, netdev, rssi_thold, rssi_hyst), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(s32, rssi_thold) __field(u32, rssi_hyst) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->rssi_thold = rssi_thold; __entry->rssi_hyst = rssi_hyst; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", rssi_thold: %d, rssi_hyst: %u ", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->rssi_thold, __entry->rssi_hyst) ); TRACE_EVENT(rdev_set_cqm_rssi_range_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, s32 low, s32 high), TP_ARGS(wiphy, netdev, low, high), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(s32, rssi_low) __field(s32, rssi_high) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->rssi_low = low; __entry->rssi_high = high; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", range: %d - %d ", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->rssi_low, __entry->rssi_high) ); TRACE_EVENT(rdev_set_cqm_txe_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate, u32 pkts, u32 intvl), TP_ARGS(wiphy, netdev, rate, pkts, intvl), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u32, rate) __field(u32, pkts) __field(u32, intvl) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->rate = rate; __entry->pkts = pkts; __entry->intvl = intvl; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", rate: %u, packets: %u, interval: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->rate, __entry->pkts, __entry->intvl) ); TRACE_EVENT(rdev_disconnect, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u16 reason_code), TP_ARGS(wiphy, netdev, reason_code), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u16, reason_code) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->reason_code = reason_code; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", reason code: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->reason_code) ); TRACE_EVENT(rdev_join_ibss, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ibss_params *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, params->bssid); memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, params->ssid, params->ssid_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, ssid: %s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid) ); TRACE_EVENT(rdev_join_ocb, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const struct ocb_setup *setup), TP_ARGS(wiphy, netdev, setup), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); TRACE_EVENT(rdev_set_wiphy_params, TP_PROTO(struct wiphy *wiphy, u32 changed), TP_ARGS(wiphy, changed), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, changed) ), TP_fast_assign( WIPHY_ASSIGN; __entry->changed = changed; ), TP_printk(WIPHY_PR_FMT ", changed: %u", WIPHY_PR_ARG, __entry->changed) ); DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), TP_ARGS(wiphy, wdev, link_id), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) ); DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_tx_power, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), TP_ARGS(wiphy, wdev, link_id) ); TRACE_EVENT(rdev_set_tx_power, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm), TP_ARGS(wiphy, wdev, type, mbm), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(enum nl80211_tx_power_setting, type) __field(int, mbm) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->type = type; __entry->mbm = mbm; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type: %u, mbm: %d", WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm) ); TRACE_EVENT(rdev_return_int_int, TP_PROTO(struct wiphy *wiphy, int func_ret, int func_fill), TP_ARGS(wiphy, func_ret, func_fill), TP_STRUCT__entry( WIPHY_ENTRY __field(int, func_ret) __field(int, func_fill) ), TP_fast_assign( WIPHY_ASSIGN; __entry->func_ret = func_ret; __entry->func_fill = func_fill; ), TP_printk(WIPHY_PR_FMT ", function returns: %d, function filled: %d", WIPHY_PR_ARG, __entry->func_ret, __entry->func_fill) ); #ifdef CONFIG_NL80211_TESTMODE TRACE_EVENT(rdev_testmode_cmd, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); TRACE_EVENT(rdev_testmode_dump, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), TP_STRUCT__entry( WIPHY_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; ), TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) ); #endif /* CONFIG_NL80211_TESTMODE */ TRACE_EVENT(rdev_set_bitrate_mask, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask), TP_ARGS(wiphy, netdev, link_id, peer, mask), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(unsigned int, link_id) MAC_ENTRY(peer) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->link_id = link_id; MAC_ASSIGN(peer, peer); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, __entry->peer) ); TRACE_EVENT(rdev_update_mgmt_frame_registrations, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd), TP_ARGS(wiphy, wdev, upd), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u16, global_stypes) __field(u16, interface_stypes) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->global_stypes = upd->global_stypes; __entry->interface_stypes = upd->interface_stypes; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", global: 0x%.2x, intf: 0x%.2x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->global_stypes, __entry->interface_stypes) ); TRACE_EVENT(rdev_return_int_tx_rx, TP_PROTO(struct wiphy *wiphy, int ret, u32 tx, u32 rx), TP_ARGS(wiphy, ret, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) __field(u32, tx) __field(u32, rx) ), TP_fast_assign( WIPHY_ASSIGN; __entry->ret = ret; __entry->tx = tx; __entry->rx = rx; ), TP_printk(WIPHY_PR_FMT ", returned %d, tx: %u, rx: %u", WIPHY_PR_ARG, __entry->ret, __entry->tx, __entry->rx) ); TRACE_EVENT(rdev_return_void_tx_rx, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 tx_max, u32 rx, u32 rx_max), TP_ARGS(wiphy, tx, tx_max, rx, rx_max), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, tx) __field(u32, tx_max) __field(u32, rx) __field(u32, rx_max) ), TP_fast_assign( WIPHY_ASSIGN; __entry->tx = tx; __entry->tx_max = tx_max; __entry->rx = rx; __entry->rx_max = rx_max; ), TP_printk(WIPHY_PR_FMT ", tx: %u, tx_max: %u, rx: %u, rx_max: %u ", WIPHY_PR_ARG, __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max) ); DECLARE_EVENT_CLASS(tx_rx_evt, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), TP_ARGS(wiphy, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, tx) __field(u32, rx) ), TP_fast_assign( WIPHY_ASSIGN; __entry->tx = tx; __entry->rx = rx; ), TP_printk(WIPHY_PR_FMT ", tx: %u, rx: %u ", WIPHY_PR_ARG, __entry->tx, __entry->rx) ); DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), TP_ARGS(wiphy, tx, rx) ); DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), TP_ARGS(wiphy, netdev, id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u64, id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->id = id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", id: %llu", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->id) ); DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_start, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), TP_ARGS(wiphy, netdev, id) ); DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_stop, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), TP_ARGS(wiphy, netdev, id) ); TRACE_EVENT(rdev_tdls_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *buf, size_t len), TP_ARGS(wiphy, netdev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(int, link_id) __field(u8, action_code) __field(u8, dialog_token) __field(u16, status_code) __field(u32, peer_capability) __field(bool, initiator) __dynamic_array(u8, buf, len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->link_id = link_id; __entry->action_code = action_code; __entry->dialog_token = dialog_token; __entry->status_code = status_code; __entry->peer_capability = peer_capability; __entry->initiator = initiator; memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM" ", link_id: %d, action_code: %u " "dialog_token: %u, status_code: %u, peer_capability: %u " "initiator: %s buf: %#.2x ", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->link_id, __entry->action_code, __entry->dialog_token, __entry->status_code, __entry->peer_capability, BOOL_TO_STR(__entry->initiator), ((u8 *)__get_dynamic_array(buf))[0]) ); TRACE_EVENT(rdev_dump_survey, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx), TP_ARGS(wiphy, netdev, _idx), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->idx = _idx; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx) ); TRACE_EVENT(rdev_return_int_survey_info, TP_PROTO(struct wiphy *wiphy, int ret, struct survey_info *info), TP_ARGS(wiphy, ret, info), TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY __field(int, ret) __field(u64, time) __field(u64, time_busy) __field(u64, time_ext_busy) __field(u64, time_rx) __field(u64, time_tx) __field(u64, time_scan) __field(u32, filled) __field(s8, noise) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(info->channel); __entry->ret = ret; __entry->time = info->time; __entry->time_busy = info->time_busy; __entry->time_ext_busy = info->time_ext_busy; __entry->time_rx = info->time_rx; __entry->time_tx = info->time_tx; __entry->time_scan = info->time_scan; __entry->filled = info->filled; __entry->noise = info->noise; ), TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT ", channel time: %llu, channel time busy: %llu, " "channel time extension busy: %llu, channel time rx: %llu, " "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d", WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG, __entry->time, __entry->time_busy, __entry->time_ext_busy, __entry->time_rx, __entry->time_tx, __entry->time_scan, __entry->filled, __entry->noise) ); TRACE_EVENT(rdev_tdls_oper, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *peer, enum nl80211_tdls_operation oper), TP_ARGS(wiphy, netdev, peer, oper), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(enum nl80211_tdls_operation, oper) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->oper = oper; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, oper: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper) ); DECLARE_EVENT_CLASS(rdev_pmksa, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa), TP_ARGS(wiphy, netdev, pmksa), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, pmksa->bssid); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid) ); TRACE_EVENT(rdev_probe_client, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer), TP_ARGS(wiphy, netdev, peer), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer) ); DEFINE_EVENT(rdev_pmksa, rdev_set_pmksa, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa), TP_ARGS(wiphy, netdev, pmksa) ); DEFINE_EVENT(rdev_pmksa, rdev_del_pmksa, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa), TP_ARGS(wiphy, netdev, pmksa) ); TRACE_EVENT(rdev_remain_on_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, unsigned int duration), TP_ARGS(wiphy, wdev, chan, duration), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY __field(unsigned int, duration) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; CHAN_ASSIGN(chan); __entry->duration = duration; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", duration: %u", WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->duration) ); TRACE_EVENT(rdev_return_int_cookie, TP_PROTO(struct wiphy *wiphy, int ret, u64 cookie), TP_ARGS(wiphy, ret, cookie), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; __entry->ret = ret; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", returned %d, cookie: %llu", WIPHY_PR_ARG, __entry->ret, __entry->cookie) ); TRACE_EVENT(rdev_cancel_remain_on_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %llu", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) ); TRACE_EVENT(rdev_mgmt_tx, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params), TP_ARGS(wiphy, wdev, params), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY __field(bool, offchan) __field(unsigned int, wait) __field(bool, no_cck) __field(bool, dont_wait_for_ack) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; CHAN_ASSIGN(params->chan); __entry->offchan = params->offchan; __entry->wait = params->wait; __entry->no_cck = params->no_cck; __entry->dont_wait_for_ack = params->dont_wait_for_ack; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", offchan: %s," " wait: %u, no cck: %s, dont wait for ack: %s", WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, BOOL_TO_STR(__entry->offchan), __entry->wait, BOOL_TO_STR(__entry->no_cck), BOOL_TO_STR(__entry->dont_wait_for_ack)) ); TRACE_EVENT(rdev_tx_control_port, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *buf, size_t len, const u8 *dest, __be16 proto, bool unencrypted, int link_id), TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted, link_id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) __field(__be16, proto) __field(bool, unencrypted) __field(int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); __entry->proto = proto; __entry->unencrypted = unencrypted; __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM," " proto: 0x%x, unencrypted: %s, link: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest, be16_to_cpu(__entry->proto), BOOL_TO_STR(__entry->unencrypted), __entry->link_id) ); TRACE_EVENT(rdev_set_noack_map, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u16 noack_map), TP_ARGS(wiphy, netdev, noack_map), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u16, noack_map) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->noack_map = noack_map; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", noack_map: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), TP_ARGS(wiphy, wdev, link_id) ); TRACE_EVENT(rdev_return_chandef, TP_PROTO(struct wiphy *wiphy, int ret, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, ret, chandef), TP_STRUCT__entry( WIPHY_ENTRY __field(int, ret) CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; if (ret == 0) CHAN_DEF_ASSIGN(chandef); else CHAN_DEF_ASSIGN((struct cfg80211_chan_def *)NULL); __entry->ret = ret; ), TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", ret: %d", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->ret) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_start_p2p_device, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); TRACE_EVENT(rdev_start_nan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf), TP_ARGS(wiphy, wdev, conf), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) __field(u8, bands) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; __entry->bands = conf->bands; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", master preference: %u, bands: 0x%0x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, __entry->bands) ); TRACE_EVENT(rdev_nan_change_conf, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes), TP_ARGS(wiphy, wdev, conf, changes), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) __field(u8, bands) __field(u32, changes) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", master preference: %u, bands: 0x%0x, changes: %x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, __entry->bands, __entry->changes) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); TRACE_EVENT(rdev_add_nan_func, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, const struct cfg80211_nan_func *func), TP_ARGS(wiphy, wdev, func), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u8, func_type) __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->func_type = func->type; __entry->cookie = func->cookie ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type, __entry->cookie) ); TRACE_EVENT(rdev_del_nan_func, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) ); TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u32, acl_policy) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->acl_policy = params->acl_policy; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) ); TRACE_EVENT(rdev_update_ft_ies, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_update_ft_ies_params *ftie), TP_ARGS(wiphy, netdev, ftie), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u16, md) __dynamic_array(u8, ie, ftie->ie_len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->md = ftie->md; memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) ); TRACE_EVENT(rdev_crit_proto_start, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_crit_proto_id protocol, u16 duration), TP_ARGS(wiphy, wdev, protocol, duration), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u16, proto) __field(u16, duration) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->proto = protocol; __entry->duration = duration; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration) ); TRACE_EVENT(rdev_crit_proto_stop, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); TRACE_EVENT(rdev_channel_switch, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_csa_settings *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY __field(bool, radar_required) __field(bool, block_tx) __field(u8, count) __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) __field(u8, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(&params->chandef); __entry->radar_required = params->radar_required; __entry->block_tx = params->block_tx; __entry->count = params->count; memcpy(__get_dynamic_array(bcn_ofs), params->counter_offsets_beacon, params->n_counter_offsets_beacon * sizeof(u16)); /* probe response offsets are optional */ if (params->n_counter_offsets_presp) memcpy(__get_dynamic_array(pres_ofs), params->counter_offsets_presp, params->n_counter_offsets_presp * sizeof(u16)); __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", block_tx: %d, count: %u, radar_required: %d, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->block_tx, __entry->count, __entry->radar_required, __entry->link_id) ); TRACE_EVENT(rdev_set_qos_map, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_qos_map *qos_map), TP_ARGS(wiphy, netdev, qos_map), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY QOS_MAP_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; QOS_MAP_ASSIGN(qos_map); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", num_des: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) ); TRACE_EVENT(rdev_set_ap_chanwidth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, unsigned int link_id, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, netdev, link_id, chandef), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(rdev_add_tx_ts, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time), TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(u8, tsid) __field(u8, user_prio) __field(u16, admitted_time) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->tsid = tsid; __entry->user_prio = user_prio; __entry->admitted_time = admitted_time; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d, UP %d, time %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tsid, __entry->user_prio, __entry->admitted_time) ); TRACE_EVENT(rdev_del_tx_ts, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 tsid, const u8 *peer), TP_ARGS(wiphy, netdev, tsid, peer), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(u8, tsid) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->tsid = tsid; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tsid) ); TRACE_EVENT(rdev_tdls_channel_switch, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *addr, u8 oper_class, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, netdev, addr, oper_class, chandef), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(addr) __field(u8, oper_class) CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(addr, addr); CHAN_DEF_ASSIGN(chandef); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM" " oper class %d, " CHAN_DEF_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr, __entry->oper_class, CHAN_DEF_PR_ARG) ); TRACE_EVENT(rdev_tdls_cancel_channel_switch, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *addr), TP_ARGS(wiphy, netdev, addr), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(addr) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(addr, addr); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr) ); TRACE_EVENT(rdev_set_pmk, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmk_conf *pmk_conf), TP_ARGS(wiphy, netdev, pmk_conf), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(aa) __field(u8, pmk_len) __field(u8, pmk_r0_name_len) __dynamic_array(u8, pmk, pmk_conf->pmk_len) __dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(aa, pmk_conf->aa); __entry->pmk_len = pmk_conf->pmk_len; __entry->pmk_r0_name_len = pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0; memcpy(__get_dynamic_array(pmk), pmk_conf->pmk, pmk_conf->pmk_len); memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name, pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM" "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->aa, __entry->pmk_len, __print_array(__get_dynamic_array(pmk), __get_dynamic_array_len(pmk), 1), __entry->pmk_r0_name_len ? __print_array(__get_dynamic_array(pmk_r0_name), __get_dynamic_array_len(pmk_r0_name), 1) : "") ); TRACE_EVENT(rdev_del_pmk, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa), TP_ARGS(wiphy, netdev, aa), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(aa) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(aa, aa); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->aa) ); TRACE_EVENT(rdev_external_auth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_external_auth_params *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry(WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(bssid) __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1) __field(u16, status) MAC_ENTRY(mld_addr) ), TP_fast_assign(WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, params->bssid); memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, params->ssid.ssid, params->ssid.ssid_len); __entry->status = params->status; MAC_ASSIGN(mld_addr, params->mld_addr); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", ssid: %s, status: %u, mld_addr: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid, __entry->status, __entry->mld_addr) ); TRACE_EVENT(rdev_start_radar_detection, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_chan_def *chandef, u32 cac_time_ms, int link_id), TP_ARGS(wiphy, netdev, chandef, cac_time_ms, link_id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY __field(u32, cac_time_ms) __field(int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->cac_time_ms = cac_time_ms; __entry->link_id = link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", cac_time_ms=%u, link_id=%d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->cac_time_ms, __entry->link_id) ); TRACE_EVENT(rdev_set_mcast_rate, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int *mcast_rate), TP_ARGS(wiphy, netdev, mcast_rate), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __array(int, mcast_rate, NUM_NL80211_BANDS) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memcpy(__entry->mcast_rate, mcast_rate, sizeof(int) * NUM_NL80211_BANDS); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mcast_rate[NL80211_BAND_2GHZ], __entry->mcast_rate[NL80211_BAND_5GHZ], __entry->mcast_rate[NL80211_BAND_6GHZ], __entry->mcast_rate[NL80211_BAND_60GHZ]) ); TRACE_EVENT(rdev_set_coalesce, TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce), TP_ARGS(wiphy, coalesce), TP_STRUCT__entry( WIPHY_ENTRY __field(int, n_rules) ), TP_fast_assign( WIPHY_ASSIGN; __entry->n_rules = coalesce ? coalesce->n_rules : 0; ), TP_printk(WIPHY_PR_FMT ", n_rules=%d", WIPHY_PR_ARG, __entry->n_rules) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); TRACE_EVENT(rdev_set_multicast_to_unicast, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const bool enabled), TP_ARGS(wiphy, netdev, enabled), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(bool, enabled) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->enabled = enabled; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s", WIPHY_PR_ARG, NETDEV_PR_ARG, BOOL_TO_STR(__entry->enabled)) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_get_txq_stats, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); TRACE_EVENT(rdev_get_ftm_responder_stats, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ftm_responder_stats *ftm_stats), TP_ARGS(wiphy, netdev, ftm_stats), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u64, timestamp) __field(u32, success_num) __field(u32, partial_num) __field(u32, failed_num) __field(u32, asap_num) __field(u32, non_asap_num) __field(u64, duration) __field(u32, unknown_triggers) __field(u32, reschedule) __field(u32, out_of_window) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->success_num = ftm_stats->success_num; __entry->partial_num = ftm_stats->partial_num; __entry->failed_num = ftm_stats->failed_num; __entry->asap_num = ftm_stats->asap_num; __entry->non_asap_num = ftm_stats->non_asap_num; __entry->duration = ftm_stats->total_duration_ms; __entry->unknown_triggers = ftm_stats->unknown_triggers_num; __entry->reschedule = ftm_stats->reschedule_requests_num; __entry->out_of_window = ftm_stats->out_of_window_triggers_num; ), TP_printk(WIPHY_PR_FMT "Ftm responder stats: success %u, partial %u, " "failed %u, asap %u, non asap %u, total duration %llu, unknown " "triggers %u, rescheduled %u, out of window %u", WIPHY_PR_ARG, __entry->success_num, __entry->partial_num, __entry->failed_num, __entry->asap_num, __entry->non_asap_num, __entry->duration, __entry->unknown_triggers, __entry->reschedule, __entry->out_of_window) ); DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_start_pmsr, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie) ); DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie) ); TRACE_EVENT(rdev_set_fils_aad, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_fils_aad *fils_aad), TP_ARGS(wiphy, netdev, fils_aad), TP_STRUCT__entry(WIPHY_ENTRY NETDEV_ENTRY __array(u8, macaddr, ETH_ALEN) __field(u8, kek_len) ), TP_fast_assign(WIPHY_ASSIGN; NETDEV_ASSIGN; FILS_AAD_ASSIGN(fils_aad); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " FILS_AAD_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, __entry->kek_len) ); TRACE_EVENT(rdev_update_owe_info, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_update_owe_info *owe_info), TP_ARGS(wiphy, netdev, owe_info), TP_STRUCT__entry(WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(u16, status) __dynamic_array(u8, ie, owe_info->ie_len)), TP_fast_assign(WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, owe_info->peer); __entry->status = owe_info->status; memcpy(__get_dynamic_array(ie), owe_info->ie, owe_info->ie_len);), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM" " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->status) ); TRACE_EVENT(rdev_probe_mesh_link, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *dest, const u8 *buf, size_t len), TP_ARGS(wiphy, netdev, dest, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest) ); TRACE_EVENT(rdev_set_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_tid_config *tid_conf), TP_ARGS(wiphy, netdev, tid_conf), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, tid_conf->peer); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer) ); TRACE_EVENT(rdev_reset_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer, u8 tids), TP_ARGS(wiphy, netdev, peer, tids), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(u8, tids) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->tids = tids; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, tids: 0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tids) ); TRACE_EVENT(rdev_set_sar_specs, TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar), TP_ARGS(wiphy, sar), TP_STRUCT__entry( WIPHY_ENTRY __field(u16, type) __field(u16, num) ), TP_fast_assign( WIPHY_ASSIGN; __entry->type = sar->type; __entry->num = sar->num_sub_specs; ), TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d", WIPHY_PR_ARG, __entry->type, __entry->num) ); TRACE_EVENT(rdev_color_change, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_color_change_settings *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u8, count) __field(u16, bcn_ofs) __field(u16, pres_ofs) __field(u8, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->count = params->count; __entry->bcn_ofs = params->counter_offset_beacon; __entry->pres_ofs = params->counter_offset_presp; __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", count: %u, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->count, __entry->link_id) ); TRACE_EVENT(rdev_set_radar_background, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, chandef), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef) ), TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); DEFINE_EVENT(wiphy_wdev_link_evt, rdev_add_intf_link, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), TP_ARGS(wiphy, wdev, link_id) ); DEFINE_EVENT(wiphy_wdev_link_evt, rdev_del_intf_link, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), TP_ARGS(wiphy, wdev, link_id) ); TRACE_EVENT(rdev_del_link_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct link_station_del_parameters *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __array(u8, mld_mac, 6) __field(u32, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memset(__entry->mld_mac, 0, 6); if (params->mld_mac) memcpy(__entry->mld_mac, params->mld_mac, 6); __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" ", link id: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac, __entry->link_id) ); TRACE_EVENT(rdev_set_hw_timestamp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_set_hw_timestamp *hwts), TP_ARGS(wiphy, netdev, hwts), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(macaddr) __field(bool, enable) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(macaddr, hwts->macaddr); __entry->enable = hwts->enable; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, __entry->enable) ); TRACE_EVENT(rdev_set_ttlm, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ttlm_params *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __array(u8, dlink, sizeof(u16) * 8) __array(u8, ulink, sizeof(u16) * 8) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memcpy(__entry->dlink, params->dlink, sizeof(params->dlink)); memcpy(__entry->ulink, params->ulink, sizeof(params->ulink)); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); TRACE_EVENT(rdev_set_epcs, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, bool val), TP_ARGS(wiphy, netdev, val), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(bool, val) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; __entry->val = val; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", config=%u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->val) ); /************************************************************* * cfg80211 exported functions traces * *************************************************************/ TRACE_EVENT(cfg80211_return_bool, TP_PROTO(bool ret), TP_ARGS(ret), TP_STRUCT__entry( __field(bool, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("returned %s", BOOL_TO_STR(__entry->ret)) ); DECLARE_EVENT_CLASS(cfg80211_netdev_mac_evt, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(macaddr) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(macaddr, macaddr); ), TP_printk(NETDEV_PR_FMT ", mac: %pM", NETDEV_PR_ARG, __entry->macaddr) ); DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_notify_new_peer_candidate, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr) ); DECLARE_EVENT_CLASS(netdev_evt_only, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev), TP_STRUCT__entry( NETDEV_ENTRY ), TP_fast_assign( NETDEV_ASSIGN; ), TP_printk(NETDEV_PR_FMT , NETDEV_PR_ARG) ); DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); TRACE_EVENT(cfg80211_send_rx_assoc, TP_PROTO(struct net_device *netdev, const struct cfg80211_rx_assoc_resp_data *data), TP_ARGS(netdev, data), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(ap_addr) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(ap_addr, data->ap_mld_addr ?: data->links[0].bss->bssid); ), TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->ap_addr) ); DECLARE_EVENT_CLASS(netdev_frame_event, TP_PROTO(struct net_device *netdev, const u8 *buf, int len), TP_ARGS(netdev, buf, len), TP_STRUCT__entry( NETDEV_ENTRY __dynamic_array(u8, frame, len) ), TP_fast_assign( NETDEV_ASSIGN; memcpy(__get_dynamic_array(frame), buf, len); ), TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", NETDEV_PR_ARG, le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt, TP_PROTO(struct net_device *netdev, const u8 *buf, int len), TP_ARGS(netdev, buf, len) ); DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt, TP_PROTO(struct net_device *netdev, const u8 *buf, int len), TP_ARGS(netdev, buf, len) ); TRACE_EVENT(cfg80211_tx_mlme_mgmt, TP_PROTO(struct net_device *netdev, const u8 *buf, int len, bool reconnect), TP_ARGS(netdev, buf, len, reconnect), TP_STRUCT__entry( NETDEV_ENTRY __dynamic_array(u8, frame, len) __field(int, reconnect) ), TP_fast_assign( NETDEV_ASSIGN; memcpy(__get_dynamic_array(frame), buf, len); __entry->reconnect = reconnect; ), TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x reconnect:%d", NETDEV_PR_ARG, le16_to_cpup((__le16 *)__get_dynamic_array(frame)), __entry->reconnect) ); DECLARE_EVENT_CLASS(netdev_mac_evt, TP_PROTO(struct net_device *netdev, const u8 *mac), TP_ARGS(netdev, mac), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(mac) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(mac, mac) ), TP_printk(NETDEV_PR_FMT ", mac: %pM", NETDEV_PR_ARG, __entry->mac) ); DEFINE_EVENT(netdev_mac_evt, cfg80211_send_auth_timeout, TP_PROTO(struct net_device *netdev, const u8 *mac), TP_ARGS(netdev, mac) ); TRACE_EVENT(cfg80211_send_assoc_failure, TP_PROTO(struct net_device *netdev, struct cfg80211_assoc_failure *data), TP_ARGS(netdev, data), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(ap_addr) __field(bool, timeout) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(ap_addr, data->ap_mld_addr ?: data->bss[0]->bssid); __entry->timeout = data->timeout; ), TP_printk(NETDEV_PR_FMT ", mac: %pM, timeout: %d", NETDEV_PR_ARG, __entry->ap_addr, __entry->timeout) ); TRACE_EVENT(cfg80211_michael_mic_failure, TP_PROTO(struct net_device *netdev, const u8 *addr, enum nl80211_key_type key_type, int key_id, const u8 *tsc), TP_ARGS(netdev, addr, key_type, key_id, tsc), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(addr) __field(enum nl80211_key_type, key_type) __field(int, key_id) __array(u8, tsc, 6) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(addr, addr); __entry->key_type = key_type; __entry->key_id = key_id; if (tsc) memcpy(__entry->tsc, tsc, 6); ), TP_printk(NETDEV_PR_FMT ", %pM, key type: %d, key id: %d, tsc: %pm", NETDEV_PR_ARG, __entry->addr, __entry->key_type, __entry->key_id, __entry->tsc) ); TRACE_EVENT(cfg80211_ready_on_channel, TP_PROTO(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration), TP_ARGS(wdev, cookie, chan, duration), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY __field(unsigned int, duration) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); __entry->duration = duration; ), TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", duration: %u", WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG, __entry->duration) ); TRACE_EVENT(cfg80211_ready_on_channel_expired, TP_PROTO(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan), TP_ARGS(wdev, cookie, chan), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); ), TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT, WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG) ); TRACE_EVENT(cfg80211_tx_mgmt_expired, TP_PROTO(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan), TP_ARGS(wdev, cookie, chan), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); ), TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT, WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG) ); TRACE_EVENT(cfg80211_new_sta, TP_PROTO(struct net_device *netdev, const u8 *mac_addr, struct station_info *sinfo), TP_ARGS(netdev, mac_addr, sinfo), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(mac_addr) SINFO_ENTRY ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); SINFO_ASSIGN; ), TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->mac_addr) ); DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr) ); TRACE_EVENT(cfg80211_rx_mgmt, TP_PROTO(struct wireless_dev *wdev, struct cfg80211_rx_info *info), TP_ARGS(wdev, info), TP_STRUCT__entry( WDEV_ENTRY __field(int, freq) __field(int, sig_dbm) ), TP_fast_assign( WDEV_ASSIGN; __entry->freq = info->freq; __entry->sig_dbm = info->sig_dbm; ), TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d", WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm) ); TRACE_EVENT(cfg80211_mgmt_tx_status, TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack), TP_ARGS(wdev, cookie, ack), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) __field(bool, ack) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; __entry->ack = ack; ), TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s", WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack)) ); TRACE_EVENT(cfg80211_control_port_tx_status, TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack), TP_ARGS(wdev, cookie, ack), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) __field(bool, ack) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; __entry->ack = ack; ), TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s", WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack)) ); TRACE_EVENT(cfg80211_rx_control_port, TP_PROTO(struct net_device *netdev, struct sk_buff *skb, bool unencrypted, int link_id), TP_ARGS(netdev, skb, unencrypted, link_id), TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) __field(int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; __entry->len = skb->len; MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; __entry->link_id = link_id; ), TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d", NETDEV_PR_ARG, __entry->len, __entry->from, __entry->proto, BOOL_TO_STR(__entry->unencrypted), __entry->link_id) ); TRACE_EVENT(cfg80211_cqm_rssi_notify, TP_PROTO(struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level), TP_ARGS(netdev, rssi_event, rssi_level), TP_STRUCT__entry( NETDEV_ENTRY __field(enum nl80211_cqm_rssi_threshold_event, rssi_event) __field(s32, rssi_level) ), TP_fast_assign( NETDEV_ASSIGN; __entry->rssi_event = rssi_event; __entry->rssi_level = rssi_level; ), TP_printk(NETDEV_PR_FMT ", rssi event: %d, level: %d", NETDEV_PR_ARG, __entry->rssi_event, __entry->rssi_level) ); TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, u32 prohibited_flags, u32 permitting_flags), TP_ARGS(wiphy, chandef, iftype, prohibited_flags, permitting_flags), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) __field(u32, prohibited_flags) __field(u32, permitting_flags) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; __entry->prohibited_flags = prohibited_flags; __entry->permitting_flags = permitting_flags; ), TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d prohibited_flags=0x%x permitting_flags=0x%x", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, __entry->prohibited_flags, __entry->permitting_flags) ); TRACE_EVENT(cfg80211_chandef_dfs_required, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, chandef), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); ), TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef, unsigned int link_id), TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->link_id = link_id; ), TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_ch_switch_started_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef, unsigned int link_id), TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->link_id = link_id; ), TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_radar_event, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, bool offchan), TP_ARGS(wiphy, chandef, offchan), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(bool, offchan) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->offchan = offchan; ), TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", offchan %d", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->offchan) ); TRACE_EVENT(cfg80211_cac_event, TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt, unsigned int link_id), TP_ARGS(netdev, evt, link_id), TP_STRUCT__entry( NETDEV_ENTRY __field(enum nl80211_radar_event, evt) __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; __entry->evt = evt; __entry->link_id = link_id; ), TP_printk(NETDEV_PR_FMT ", event: %d, link_id=%u", NETDEV_PR_ARG, __entry->evt, __entry->link_id) ); DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(addr) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(addr, addr); ), TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->addr) ); DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_spurious_frame, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr) ); DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_unexpected_4addr_frame, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr) ); TRACE_EVENT(cfg80211_ibss_joined, TP_PROTO(struct net_device *netdev, const u8 *bssid, struct ieee80211_channel *channel), TP_ARGS(netdev, bssid, channel), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(bssid) CHAN_ENTRY ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(bssid, bssid); CHAN_ASSIGN(channel); ), TP_printk(NETDEV_PR_FMT ", bssid: %pM, " CHAN_PR_FMT, NETDEV_PR_ARG, __entry->bssid, CHAN_PR_ARG) ); TRACE_EVENT(cfg80211_probe_status, TP_PROTO(struct net_device *netdev, const u8 *addr, u64 cookie, bool acked), TP_ARGS(netdev, addr, cookie, acked), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(addr) __field(u64, cookie) __field(bool, acked) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(addr, addr); __entry->cookie = cookie; __entry->acked = acked; ), TP_printk(NETDEV_PR_FMT " addr:%pM, cookie: %llu, acked: %s", NETDEV_PR_ARG, __entry->addr, __entry->cookie, BOOL_TO_STR(__entry->acked)) ); TRACE_EVENT(cfg80211_cqm_pktloss_notify, TP_PROTO(struct net_device *netdev, const u8 *peer, u32 num_packets), TP_ARGS(netdev, peer, num_packets), TP_STRUCT__entry( NETDEV_ENTRY MAC_ENTRY(peer) __field(u32, num_packets) ), TP_fast_assign( NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->num_packets = num_packets; ), TP_printk(NETDEV_PR_FMT ", peer: %pM, num of lost packets: %u", NETDEV_PR_ARG, __entry->peer, __entry->num_packets) ); DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_gtk_rekey_notify, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr) ); TRACE_EVENT(cfg80211_pmksa_candidate_notify, TP_PROTO(struct net_device *netdev, int index, const u8 *bssid, bool preauth), TP_ARGS(netdev, index, bssid, preauth), TP_STRUCT__entry( NETDEV_ENTRY __field(int, index) MAC_ENTRY(bssid) __field(bool, preauth) ), TP_fast_assign( NETDEV_ASSIGN; __entry->index = index; MAC_ASSIGN(bssid, bssid); __entry->preauth = preauth; ), TP_printk(NETDEV_PR_FMT ", index:%d, bssid: %pM, pre auth: %s", NETDEV_PR_ARG, __entry->index, __entry->bssid, BOOL_TO_STR(__entry->preauth)) ); TRACE_EVENT(cfg80211_report_obss_beacon, TP_PROTO(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm), TP_ARGS(wiphy, frame, len, freq, sig_dbm), TP_STRUCT__entry( WIPHY_ENTRY __field(int, freq) __field(int, sig_dbm) ), TP_fast_assign( WIPHY_ASSIGN; __entry->freq = freq; __entry->sig_dbm = sig_dbm; ), TP_printk(WIPHY_PR_FMT ", freq: "KHZ_F", sig_dbm: %d", WIPHY_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm) ); TRACE_EVENT(cfg80211_tdls_oper_request, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code), TP_ARGS(wiphy, netdev, peer, oper, reason_code), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __field(enum nl80211_tdls_operation, oper) __field(u16, reason_code) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); __entry->oper = oper; __entry->reason_code = reason_code; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, oper: %d, reason_code %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper, __entry->reason_code) ); TRACE_EVENT(cfg80211_scan_done, TP_PROTO(struct cfg80211_scan_request *request, struct cfg80211_scan_info *info), TP_ARGS(request, info), TP_STRUCT__entry( __field(u32, n_channels) __dynamic_array(u8, ie, request ? request->ie_len : 0) __array(u32, rates, NUM_NL80211_BANDS) __field(u32, wdev_id) MAC_ENTRY(wiphy_mac) __field(bool, no_cck) __field(bool, aborted) __field(u64, scan_start_tsf) MAC_ENTRY(tsf_bssid) ), TP_fast_assign( if (request) { memcpy(__get_dynamic_array(ie), request->ie, request->ie_len); memcpy(__entry->rates, request->rates, NUM_NL80211_BANDS); __entry->wdev_id = request->wdev ? request->wdev->identifier : 0; if (request->wiphy) MAC_ASSIGN(wiphy_mac, request->wiphy->perm_addr); __entry->no_cck = request->no_cck; } if (info) { __entry->aborted = info->aborted; __entry->scan_start_tsf = info->scan_start_tsf; MAC_ASSIGN(tsf_bssid, info->tsf_bssid); } ), TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: %pM", BOOL_TO_STR(__entry->aborted), (unsigned long long)__entry->scan_start_tsf, __entry->tsf_bssid) ); DECLARE_EVENT_CLASS(wiphy_id_evt, TP_PROTO(struct wiphy *wiphy, u64 id), TP_ARGS(wiphy, id), TP_STRUCT__entry( WIPHY_ENTRY __field(u64, id) ), TP_fast_assign( WIPHY_ASSIGN; __entry->id = id; ), TP_printk(WIPHY_PR_FMT ", id: %llu", WIPHY_PR_ARG, __entry->id) ); DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_stopped, TP_PROTO(struct wiphy *wiphy, u64 id), TP_ARGS(wiphy, id) ); DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_results, TP_PROTO(struct wiphy *wiphy, u64 id), TP_ARGS(wiphy, id) ); TRACE_EVENT(cfg80211_get_bss, TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, const u8 *ssid, size_t ssid_len, enum ieee80211_bss_type bss_type, enum ieee80211_privacy privacy), TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, bss_type, privacy), TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY MAC_ENTRY(bssid) __dynamic_array(u8, ssid, ssid_len) __field(enum ieee80211_bss_type, bss_type) __field(enum ieee80211_privacy, privacy) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(channel); MAC_ASSIGN(bssid, bssid); memcpy(__get_dynamic_array(ssid), ssid, ssid_len); __entry->bss_type = bss_type; __entry->privacy = privacy; ), TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", %pM" ", buf: %#.2x, bss_type: %d, privacy: %d", WIPHY_PR_ARG, CHAN_PR_ARG, __entry->bssid, ((u8 *)__get_dynamic_array(ssid))[0], __entry->bss_type, __entry->privacy) ); TRACE_EVENT(cfg80211_inform_bss_frame, TP_PROTO(struct wiphy *wiphy, struct cfg80211_inform_bss *data, struct ieee80211_mgmt *mgmt, size_t len), TP_ARGS(wiphy, data, mgmt, len), TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY __dynamic_array(u8, mgmt, len) __field(s32, signal) __field(u64, ts_boottime) __field(u64, parent_tsf) MAC_ENTRY(parent_bssid) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(data->chan); if (mgmt) memcpy(__get_dynamic_array(mgmt), mgmt, len); __entry->signal = data->signal; __entry->ts_boottime = data->boottime_ns; __entry->parent_tsf = data->parent_tsf; MAC_ASSIGN(parent_bssid, data->parent_bssid); ), TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM", WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal, (unsigned long long)__entry->ts_boottime, (unsigned long long)__entry->parent_tsf, __entry->parent_bssid) ); DECLARE_EVENT_CLASS(cfg80211_bss_evt, TP_PROTO(struct cfg80211_bss *pub), TP_ARGS(pub), TP_STRUCT__entry( MAC_ENTRY(bssid) CHAN_ENTRY ), TP_fast_assign( MAC_ASSIGN(bssid, pub->bssid); CHAN_ASSIGN(pub->channel); ), TP_printk("%pM, " CHAN_PR_FMT, __entry->bssid, CHAN_PR_ARG) ); DEFINE_EVENT(cfg80211_bss_evt, cfg80211_return_bss, TP_PROTO(struct cfg80211_bss *pub), TP_ARGS(pub) ); TRACE_EVENT(cfg80211_return_uint, TP_PROTO(unsigned int ret), TP_ARGS(ret), TP_STRUCT__entry( __field(unsigned int, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret: %d", __entry->ret) ); TRACE_EVENT(cfg80211_return_u32, TP_PROTO(u32 ret), TP_ARGS(ret), TP_STRUCT__entry( __field(u32, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret: %u", __entry->ret) ); TRACE_EVENT(cfg80211_report_wowlan_wakeup, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup), TP_ARGS(wiphy, wdev, wakeup), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(bool, non_wireless) __field(bool, disconnect) __field(bool, magic_pkt) __field(bool, gtk_rekey_failure) __field(bool, eap_identity_req) __field(bool, four_way_handshake) __field(bool, rfkill_release) __field(s32, pattern_idx) __field(u32, packet_len) __dynamic_array(u8, packet, wakeup ? wakeup->packet_present_len : 0) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->non_wireless = !wakeup; __entry->disconnect = wakeup ? wakeup->disconnect : false; __entry->magic_pkt = wakeup ? wakeup->magic_pkt : false; __entry->gtk_rekey_failure = wakeup ? wakeup->gtk_rekey_failure : false; __entry->eap_identity_req = wakeup ? wakeup->eap_identity_req : false; __entry->four_way_handshake = wakeup ? wakeup->four_way_handshake : false; __entry->rfkill_release = wakeup ? wakeup->rfkill_release : false; __entry->pattern_idx = wakeup ? wakeup->pattern_idx : false; __entry->packet_len = wakeup ? wakeup->packet_len : false; if (wakeup && wakeup->packet && wakeup->packet_present_len) memcpy(__get_dynamic_array(packet), wakeup->packet, wakeup->packet_present_len); ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); TRACE_EVENT(cfg80211_ft_event, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ft_event_params *ft_event), TP_ARGS(wiphy, netdev, ft_event), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __dynamic_array(u8, ies, ft_event->ies_len) MAC_ENTRY(target_ap) __dynamic_array(u8, ric_ies, ft_event->ric_ies_len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; if (ft_event->ies) memcpy(__get_dynamic_array(ies), ft_event->ies, ft_event->ies_len); MAC_ASSIGN(target_ap, ft_event->target_ap); if (ft_event->ric_ies) memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies, ft_event->ric_ies_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->target_ap) ); TRACE_EVENT(cfg80211_stop_iface, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); TRACE_EVENT(cfg80211_pmsr_report, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie, const u8 *addr), TP_ARGS(wiphy, wdev, cookie, addr), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) MAC_ENTRY(addr) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; MAC_ASSIGN(addr, addr); ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld, %pM", WIPHY_PR_ARG, WDEV_PR_ARG, (unsigned long long)__entry->cookie, __entry->addr) ); TRACE_EVENT(cfg80211_pmsr_complete, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie), TP_ARGS(wiphy, wdev, cookie), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY __field(u64, cookie) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->cookie = cookie; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld", WIPHY_PR_ARG, WDEV_PR_ARG, (unsigned long long)__entry->cookie) ); TRACE_EVENT(cfg80211_update_owe_info_event, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_update_owe_info *owe_info), TP_ARGS(wiphy, netdev, owe_info), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) __dynamic_array(u8, ie, owe_info->ie_len) __field(int, assoc_link_id) MAC_ENTRY(peer_mld_addr) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, owe_info->peer); memcpy(__get_dynamic_array(ie), owe_info->ie, owe_info->ie_len); __entry->assoc_link_id = owe_info->assoc_link_id; MAC_ASSIGN(peer_mld_addr, owe_info->peer_mld_addr); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM," " assoc_link_id: %d, peer_mld_addr: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->assoc_link_id, __entry->peer_mld_addr) ); TRACE_EVENT(cfg80211_bss_color_notify, TP_PROTO(struct net_device *netdev, enum nl80211_commands cmd, u8 count, u64 color_bitmap), TP_ARGS(netdev, cmd, count, color_bitmap), TP_STRUCT__entry( NETDEV_ENTRY __field(u32, cmd) __field(u8, count) __field(u64, color_bitmap) ), TP_fast_assign( NETDEV_ASSIGN; __entry->cmd = cmd; __entry->count = count; __entry->color_bitmap = color_bitmap; ), TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx", NETDEV_PR_ARG, __entry->cmd, __entry->count, __entry->color_bitmap) ); TRACE_EVENT(cfg80211_assoc_comeback, TP_PROTO(struct wireless_dev *wdev, const u8 *ap_addr, u32 timeout), TP_ARGS(wdev, ap_addr, timeout), TP_STRUCT__entry( WDEV_ENTRY MAC_ENTRY(ap_addr) __field(u32, timeout) ), TP_fast_assign( WDEV_ASSIGN; MAC_ASSIGN(ap_addr, ap_addr); __entry->timeout = timeout; ), TP_printk(WDEV_PR_FMT ", %pM, timeout: %u TUs", WDEV_PR_ARG, __entry->ap_addr, __entry->timeout) ); DECLARE_EVENT_CLASS(link_station_add_mod, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct link_station_parameters *params), TP_ARGS(wiphy, netdev, params), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __array(u8, mld_mac, 6) __array(u8, link_mac, 6) __field(u32, link_id) __dynamic_array(u8, supported_rates, params->supported_rates_len) __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap)) __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap)) __field(u8, opmode_notif) __field(bool, opmode_notif_used) __dynamic_array(u8, he_capa, params->he_capa_len) __array(u8, he_6ghz_capa, (int)sizeof(struct ieee80211_he_6ghz_capa)) __dynamic_array(u8, eht_capa, params->eht_capa_len) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memset(__entry->mld_mac, 0, 6); memset(__entry->link_mac, 0, 6); if (params->mld_mac) memcpy(__entry->mld_mac, params->mld_mac, 6); if (params->link_mac) memcpy(__entry->link_mac, params->link_mac, 6); __entry->link_id = params->link_id; if (params->supported_rates && params->supported_rates_len) memcpy(__get_dynamic_array(supported_rates), params->supported_rates, params->supported_rates_len); memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap)); if (params->ht_capa) memcpy(__entry->ht_capa, params->ht_capa, sizeof(struct ieee80211_ht_cap)); memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap)); if (params->vht_capa) memcpy(__entry->vht_capa, params->vht_capa, sizeof(struct ieee80211_vht_cap)); __entry->opmode_notif = params->opmode_notif; __entry->opmode_notif_used = params->opmode_notif_used; if (params->he_capa && params->he_capa_len) memcpy(__get_dynamic_array(he_capa), params->he_capa, params->he_capa_len); memset(__entry->he_6ghz_capa, 0, sizeof(struct ieee80211_he_6ghz_capa)); if (params->he_6ghz_capa) memcpy(__entry->he_6ghz_capa, params->he_6ghz_capa, sizeof(struct ieee80211_he_6ghz_capa)); if (params->eht_capa && params->eht_capa_len) memcpy(__get_dynamic_array(eht_capa), params->eht_capa, params->eht_capa_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" ", link mac: %pM, link id: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac, __entry->link_mac, __entry->link_id) ); DEFINE_EVENT(link_station_add_mod, rdev_add_link_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct link_station_parameters *params), TP_ARGS(wiphy, netdev, params) ); DEFINE_EVENT(link_station_add_mod, rdev_mod_link_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct link_station_parameters *params), TP_ARGS(wiphy, netdev, params) ); TRACE_EVENT(cfg80211_links_removed, TP_PROTO(struct net_device *netdev, u16 link_mask), TP_ARGS(netdev, link_mask), TP_STRUCT__entry( NETDEV_ENTRY __field(u16, link_mask) ), TP_fast_assign( NETDEV_ASSIGN; __entry->link_mask = link_mask; ), TP_printk(NETDEV_PR_FMT ", link_mask:0x%x", NETDEV_PR_ARG, __entry->link_mask) ); TRACE_EVENT(cfg80211_mlo_reconf_add_done, TP_PROTO(struct net_device *netdev, u16 link_mask, const u8 *buf, size_t len), TP_ARGS(netdev, link_mask, buf, len), TP_STRUCT__entry( NETDEV_ENTRY __field(u16, link_mask) __dynamic_array(u8, buf, len) ), TP_fast_assign( NETDEV_ASSIGN; __entry->link_mask = link_mask; memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk(NETDEV_PR_FMT ", link_mask:0x%x", NETDEV_PR_ARG, __entry->link_mask) ); TRACE_EVENT(rdev_assoc_ml_reconf, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_assoc_link *add_links, u16 rem_links), TP_ARGS(wiphy, netdev, add_links, rem_links), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u16, add_links) __field(u16, rem_links) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; u32 i; __entry->add_links = 0; __entry->rem_links = rem_links; for (i = 0; add_links && i < IEEE80211_MLD_MAX_NUM_LINKS; i++) if (add_links[i].bss) __entry->add_links |= BIT(i); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", add_links=0x%x, rem_links=0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->add_links, __entry->rem_links) ); TRACE_EVENT(cfg80211_epcs_changed, TP_PROTO(struct wireless_dev *wdev, bool enabled), TP_ARGS(wdev, enabled), TP_STRUCT__entry( WDEV_ENTRY __field(u32, enabled) ), TP_fast_assign( WDEV_ASSIGN; __entry->enabled = enabled; ), TP_printk(WDEV_PR_FMT ", enabled=%u", WDEV_PR_ARG, __entry->enabled) ); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
1277 60 1224 1204 1196 663 164 64 50 5 14 38 237 4 17 219 69 56 13 152 53 98 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 // SPDX-License-Identifier: GPL-2.0-only /* * Access kernel or user memory without faulting. */ #include <linux/export.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/tlb.h> bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return true; } /* * The below only uses kmsan_check_memory() to ensure uninitialized kernel * memory isn't leaked. */ #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ kmsan_check_memory(src, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); if (!(align & 7)) copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ instrument_write(dst, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; pagefault_disable(); if (!(align & 7)) copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_to_kernel_nofault); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) { const void *src = unsafe_addr; if (unlikely(count <= 0)) return 0; if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); do { __get_kernel_nofault(dst, src, u8, Efault); dst++; src++; } while (dst[-1] && src - unsafe_addr < count); pagefault_enable(); dst[-1] = '\0'; return src - unsafe_addr; Efault: pagefault_enable(); dst[0] = '\0'; return -EFAULT; } /** * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk * * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; if (!__access_ok(src, size)) return ret; if (!nmi_uaccess_okay()) return ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk * * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user * address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @unsafe_addr: Unsafe user address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe user address to kernel buffer. * * On success, returns the length of the string INCLUDING the trailing NUL. * * If access fails, returns -EFAULT (some data may have been copied * and the trailing NUL added). * * If @count is smaller than the length of the string, copies @count-1 bytes, * sets the last byte of @dst buffer to NUL and returns @count. */ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { long ret; if (unlikely(count <= 0)) return 0; pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); if (ret >= count) { ret = count; dst[ret - 1] = '\0'; } else if (ret > 0) { ret++; } return ret; } /** * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL. * @unsafe_addr: The string to measure. * @count: Maximum count (including NUL) * * Get the size of a NUL-terminated string in user space without pagefault. * * Returns the size of the string INCLUDING the terminating NUL. * * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * Unlike strnlen_user, this can be used from IRQ handler etc. because * it disables pagefaults. */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { int ret; pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); return ret; } void __copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } EXPORT_SYMBOL(__copy_overflow);
31 191 47 47 47 422 44 7 43 37 15 14 2 31 30 30 42 67 68 28 26 3 2 7 23 70 68 2 2 68 68 68 22 42 17 16 92 129 129 27 102 1 102 17 92 21 64 13 92 81 13 70 11 11 6 76 6 11 1 4 68 83 8 75 7 68 74 82 1 22 61 7 15 61 182 29 29 29 29 29 29 29 6 20 3 29 29 29 29 29 29 29 29 10 19 19 2 2 2 13 15 26 27 1 26 12 1 14 2 15 15 15 15 15 1 27 27 17 8 1 24 24 16 3 33 34 34 34 34 28 21 19 2 19 8 3 5 61 60 59 2 12 1 39 5 6 51 34 9 8 3 1 3 3 3 3 6 12 2 51 52 52 50 36 14 48 90 1 9 49 34 29 37 38 91 91 91 12 2 1 9 2 7 65 14 1 33 38 47 1 29 1 29 2 27 262 263 262 263 1 50 1 146 1 36 45 42 152 44 2 92 155 6 1 98 2 21 118 5 2 87 31 2 31 133 25 126 98 6 46 41 10 2 107 4 112 80 33 111 112 112 55 1 99 76 11 69 21 5 8 8 25 16 2 33 130 130 105 70 16 8 24 91 90 46 43 4 43 48 348 49 297 41 3 38 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 // SPDX-License-Identifier: GPL-2.0-or-later /* * UDP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/ipv4/udp.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. */ #include <linux/bpf-cgroup.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/indirect_call_wrapper.h> #include <trace/events/udp.h> #include <net/addrconf.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/raw.h> #include <net/seg6.h> #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> #include <net/udp_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/busy_poll.h> #include <net/sock_reuseport.h> #include <net/gro.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <trace/events/skb.h> #include "udp_impl.h" static void udpv6_destruct_sock(struct sock *sk) { udp_destruct_common(sk); inet6_sock_destruct(sk); } int udpv6_init_sock(struct sock *sk) { udp_lib_init_sock(sk); sk->sk_destruct = udpv6_destruct_sock; set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } INDIRECT_CALLABLE_SCOPE u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; net_get_random_once(&udp6_ehash_secret, sizeof(udp6_ehash_secret)); net_get_random_once(&udp_ipv6_hash_secret, sizeof(udp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, udp6_ehash_secret + net_hash_mix(net)); } int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; return udp_lib_get_port(sk, snum, hash2_nulladdr); } void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); u16 new_hash4; if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { new_hash4 = udp_ehashfn(sock_net(sk), sk->sk_rcv_saddr, sk->sk_num, sk->sk_daddr, sk->sk_dport); } else { new_hash4 = udp6_ehashfn(sock_net(sk), &sk->sk_v6_rcv_saddr, sk->sk_num, &sk->sk_v6_daddr, sk->sk_dport); } udp_lib_rehash(sk, new_hash, new_hash4); } static int compute_score(struct sock *sk, const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, int dif, int sdif) { int bound_dev_if, score; struct inet_sock *inet; bool dev_match; if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || sk->sk_family != PF_INET6) return -1; if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; inet = inet_sk(sk); if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; score++; } if (!ipv6_addr_any(&sk->sk_v6_daddr)) { if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif); if (!dev_match) return -1; if (bound_dev_if) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } /** * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port) * @net: Network namespace * @saddr: Source address, network order * @sport: Source port, network order * @daddr: Destination address, network order * @hnum: Destination port, host order * @dif: Destination interface index * @sdif: Destination bridge port index, if relevant * @udptable: Set of UDP hash tables * * Simplified lookup to be used as fallback if no sockets are found due to a * potential race between (receive) address change, and lookup happening before * the rehash operation. This function ignores SO_REUSEPORT groups while scoring * result sockets, because if we have one, we don't need the fallback at all. * * Called under rcu_read_lock(). * * Return: socket with highest matching score if any, NULL if none */ static struct sock *udp6_lib_lookup1(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, int sdif, const struct udp_table *udptable) { unsigned int slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot = &udptable->hash[slot]; struct sock *sk, *result = NULL; int score, badness = 0; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { result = sk; badness = score; } } return result; } /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; int score, badness; bool need_rescore; result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { need_rescore = false; rescore: score = compute_score(need_rescore ? result : sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; if (need_rescore) continue; if (sk->sk_state == TCP_ESTABLISHED) { result = sk; continue; } result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, udp6_ehashfn); if (!result) { result = sk; continue; } /* Fall back to scoring if group has connections */ if (!reuseport_has_conns(sk)) return result; /* Reuseport logic returned an error, keep original score. */ if (IS_ERR(result)) continue; /* compute_score is too long of a function to be * inlined, and calling it again here yields * measureable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ need_rescore = true; goto rescore; } } return result; } #if IS_ENABLED(CONFIG_BASE_SMALL) static struct sock *udp6_lib_lookup4(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, int sdif, struct udp_table *udptable) { return NULL; } static void udp6_hash4(struct sock *sk) { } #else /* !CONFIG_BASE_SMALL */ static struct sock *udp6_lib_lookup4(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, int sdif, struct udp_table *udptable) { const __portpair ports = INET_COMBINED_PORTS(sport, hnum); const struct hlist_nulls_node *node; struct udp_hslot *hslot4; unsigned int hash4, slot; struct udp_sock *up; struct sock *sk; hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport); slot = hash4 & udptable->mask; hslot4 = &udptable->hash4[slot]; begin: udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) { sk = (struct sock *)up; if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) return sk; } /* if the nulls value we got at the end of this lookup is not the * expected one, we must restart lookup. We probably met an item that * was moved to another chain due to rehash. */ if (get_nulls_value(node) != slot) goto begin; return NULL; } static void udp6_hash4(struct sock *sk) { struct net *net = sock_net(sk); unsigned int hash; if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { udp4_hash4(sk); return; } if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr)) return; hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num, &sk->sk_v6_daddr, sk->sk_dport); udp_lib_hash4(sk, hash); } #endif /* CONFIG_BASE_SMALL */ /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { unsigned short hnum = ntohs(dport); struct udp_hslot *hslot2; struct sock *result, *sk; unsigned int hash2; hash2 = ipv6_portaddr_hash(net, daddr, hnum); hslot2 = udp_hashslot2(udptable, hash2); if (udp_has_hash4(hslot2)) { result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum, dif, sdif, udptable); if (result) /* udp6_lib_lookup4 return sk or NULL */ return result; } /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) goto done; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && udptable == net->ipv4.udp_table) { sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp6_ehashfn); if (sk) { result = sk; goto done; } } /* Got non-wildcard socket or error on first lookup */ if (result) goto done; /* Lookup wildcard sockets */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); hslot2 = udp_hashslot2(udptable, hash2); result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, sdif, hslot2, skb); if (!IS_ERR_OR_NULL(result)) goto done; /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */ result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif, udptable); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { const struct ipv6hdr *iph = ipv6_hdr(skb); return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), inet6_sdif(skb), udptable, skb); } struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, &iph->daddr, dport, iif, sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6) struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { struct sock *sk; sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, net->ipv4.udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(udp6_lib_lookup); #endif /* do not use the scratch area len for jumbogram: their length execeeds the * scratch area space; note that the IP6CB flags is still in the first * cacheline, so checking for jumbograms is cheap */ static int udp6_skb_len(struct sk_buff *skb) { return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb); } /* * This should be easy, if there is something there we * return it, otherwise we block. */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); struct udp_mib __percpu *mib; bool checksum_valid = false; int is_udp4; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; ulen = udp6_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); mib = __UDPX_MIB(sk, is_udp4); /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial * coverage checksum (UDP-Lite), do it before the copy. */ if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || udp_skb_csum_unnecessary(skb)) { if (udp_skb_is_linear(skb)) err = copy_linear_skb(skb, copied, off, &msg->msg_iter); else err = skb_copy_datagram_msg(skb, off, msg, copied); } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } if (unlikely(err)) { if (!peeking) { atomic_inc(&sk->sk_drops); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; if (is_udp4) { ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); sin6->sin6_scope_id = 0; } else { sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); } *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, (struct sockaddr *)sin6, addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); if (is_udp4) { if (inet_cmsg_flags(inet)) ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); } err = copied; if (flags & MSG_TRUNC) err = ulen; skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); /* starting over for a new packet, but check if we need to yield */ cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); } EXPORT_SYMBOL(udpv6_encap_enable); /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { int i; for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); const struct ip6_tnl_encap_ops *encap; encap = rcu_dereference(ip6tun_encaps[i]); if (!encap) continue; handler = encap->err_handler; if (handler && !handler(skb, opt, type, code, offset, info)) return 0; } return -ENOENT; } /* Try to match ICMP errors to UDP tunnels by looking up a socket without * reversing source and destination port: this will match tunnels that force the * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that * lwtunnels might actually break this assumption by being configured with * different destination ports on endpoints, in this case we won't be able to * trace ICMP messages back to them. * * If this doesn't match any socket, probe tunnels with arbitrary destination * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port * we've sent packets to won't necessarily match the local destination port. * * Then ask the tunnel implementation to match the error against a valid * association. * * Return an error if we can't find a match, the socket if we need further * processing, zero otherwise. */ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); /* Network header needs to point to the outer IPv6 header inside ICMP */ skb_reset_network_header(skb); /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (lookup && lookup(sk, skb)) sk = NULL; goto out; } sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); } skb_set_transport_header(skb, transport_offset); skb_set_network_header(skb, network_offset); return sk; } int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); bool tunnel = false; struct sock *sk; int harderr; int err; struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, udptable, sk, skb, opt, type, code, info); if (!sk) return 0; } else sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return PTR_ERR(sk); } tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), READ_ONCE(sk->sk_mark), sk->sk_uid); } else { ip6_sk_redirect(skb, sk); } goto out; } /* Tunnels don't have an application socket: don't pass errors back */ if (tunnel) { if (udp_sk(sk)->encap_err_rcv) udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); goto out; } if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); } sk->sk_err = err; sk_error_report(sk); out: return 0; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } else { sk_mark_napi_id_once(sk, skb); } rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); enum skb_drop_reason drop_reason; /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { UDP6_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, is_udplite); drop_reason = SKB_DROP_REASON_PROTO_MEM; } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } return 0; } static __inline__ int udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { return __udp6_lib_err(skb, opt, type, code, offset, info, dev_net(skb->dev)->ipv4.udp_table); } static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; } nf_reset_ct(skb); if (static_branch_unlikely(&udpv6_encap_needed_key) && READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* * This is an encapsulation socket so pass the skb to * the socket's udp_encap_rcv() hook. Otherwise, just * fall through and pass this up the UDP socket. * up->encap_rcv() returns the following value: * =0 if skb was successfully passed to the encap * handler or was discarded by it. * >0 if skb should be passed on to UDP. * <0 if skb should be resubmitted as proto -N */ /* if we're overly short, let UDP handle it */ encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ if (udp_lib_checksum_complete(skb)) goto csum_error; ret = encap_rcv(sk, skb); if (ret <= 0) { __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } } /* FALLTHROUGH -- it's a UDP Packet */ } /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { u16 pcrlen = READ_ONCE(up->pcrlen); if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; } udp_csum_pull_header(skb); skb_dst_drop(skb); return __udpv6_queue_rcv_skb(sk, skb); csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, drop_reason); return -1; } static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff *next, *segs; int ret; if (likely(!udp_unexpected_gso(sk, skb))) return udpv6_queue_rcv_one_skb(sk, skb); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, false); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); udp_post_segment_fix_csum(skb); ret = udpv6_queue_rcv_one_skb(sk, skb); if (ret > 0) ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, true); } return 0; } static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif, unsigned short hnum) { const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) return false; if (udp_sk(sk)->udp_port_hash != hnum || sk->sk_family != PF_INET6 || (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; if (!inet6_mc_check(sk, loc_addr, rmt_addr)) return false; return true; } static void udp6_csum_zero_error(struct sk_buff *skb) { /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable, int proto) { struct sock *sk, *first = NULL; const struct udphdr *uh = udp_hdr(skb); unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); int dif = inet6_iif(skb); int sdif = inet6_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; if (use_hash2) { hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & udptable->mask; hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2].hslot; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, uh->source, saddr, dif, sdif, hnum)) continue; /* If zero checksum and no_check is not on for * the socket then skip it. */ if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; continue; } nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { atomic_inc(&sk->sk_drops); __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); __UDP6_INC_STATS(net, UDP_MIB_INERRORS, IS_UDPLITE(sk)); continue; } if (udpv6_queue_rcv_skb(sk, nskb) > 0) consume_skb(nskb); } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { hash2 = hash2_any; goto start_lookup; } if (first) { if (udpv6_queue_rcv_skb(first, skb) > 0) consume_skb(skb); } else { kfree_skb(skb); __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, proto == IPPROTO_UDPLITE); } return 0; } static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { if (udp_sk_rx_dst_set(sk, dst)) sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and * return code conversion for ip layer consumption */ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, struct udphdr *uh) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); /* a return value > 0 means to resubmit the input */ if (ret > 0) return ret; return 0; } int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); struct sock *sk = NULL; struct udphdr *uh; bool refcounted; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto discard; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; uh = udp_hdr(skb); ulen = ntohs(uh->len); if (ulen > skb->len) goto short_packet; if (proto == IPPROTO_UDP) { /* UDP validates ulen. */ /* Check for jumbo payload */ if (ulen == 0) ulen = skb->len; if (ulen < sizeof(*uh)) goto short_packet; if (ulen < skb->len) { if (pskb_trim_rcsum(skb, ulen)) goto short_packet; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; uh = udp_hdr(skb); } } if (udp6_csum_init(skb, uh, proto)) goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, &refcounted, udp6_ehashfn); if (IS_ERR(sk)) goto no_sk; if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; } ret = udp6_unicast_rcv_skb(sk, skb, uh); if (refcounted) sock_put(sk); return ret; } /* * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, saddr, daddr, udptable, proto); /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } no_sk: reason = SKB_DROP_REASON_NO_SOCKET; if (!uh->check) goto report_csum_error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; nf_reset_ct(skb); if (udp_lib_checksum_complete(skb)) goto csum_error; __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); sk_skb_reason_drop(sk, skb, reason); return 0; short_packet: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", saddr, ntohs(uh->source), ulen, skb->len, daddr, ntohs(uh->dest)); goto discard; report_csum_error: udp6_csum_zero_error(skb); csum_error: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); sk_skb_reason_drop(sk, skb, reason); return 0; } static struct sock *__udp6_lib_demux_lookup(struct net *net, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif) { struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); struct udp_hslot *hslot2; unsigned int hash2; __portpair ports; struct sock *sk; hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); hslot2 = udp_hashslot2(udptable, hash2); ports = INET_COMBINED_PORTS(rmt_port, hnum); udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; } return NULL; } void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; int sdif = inet6_sdif(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; uh = udp_hdr(skb); if (skb->pkt_type == PACKET_HOST) sk = __udp6_lib_demux_lookup(net, uh->dest, &ipv6_hdr(skb)->daddr, uh->source, &ipv6_hdr(skb)->saddr, dif, sdif); else return; if (!sk) return; skb->sk = sk; DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk)); skb->destructor = sock_pfree; dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst) { /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); } } INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } /* * Throw away all pending data and cancel the corking. Socket is locked. */ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET) udp_flush_pending_frames(sk); else if (up->pending) { up->len = 0; WRITE_ONCE(up->pending, 0); ip6_flush_pending_frames(sk); } } static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { if (addr_len < offsetofend(struct sockaddr, sa_family)) return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. */ if (uaddr->sa_family == AF_INET) { if (ipv6_only_sock(sk)) return -EAFNOSUPPORT; return udp_pre_connect(sk, uaddr, addr_len); } if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { int res; lock_sock(sk); res = __ip6_datagram_connect(sk, uaddr, addr_len); if (!res) udp6_hash4(sk); release_sock(sk); return res; } /** * udp6_hwcsum_outgoing - handle outgoing HW checksumming * @sk: socket we are sending on * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) * @saddr: source address * @daddr: destination address * @len: length of packet */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len) { unsigned int offset; struct udphdr *uh = udp_hdr(skb); struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); csum = skb->csum; skb->ip_summed = CHECKSUM_NONE; do { csum = csum_add(csum, frags->csum); } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } /* * Sending */ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, struct inet_cork *cork) { struct sock *sk = skb->sk; struct udphdr *uh; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; int offset = skb_transport_offset(skb); int len = skb->len - offset; int datalen = len - sizeof(*uh); /* * Create a UDP header */ uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; uh->len = htons(len); uh->check = 0; if (cork->gso_size) { const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } if (datalen > cork->gso_size) { skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, cork->gso_size); /* Don't checksum the payload, skb will get segmented */ goto csum_partial; } } if (is_udplite) csum = udplite_csum(skb); else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } } else { UDP6_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); } return err; } static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); skb = ip6_finish_skb(sk); if (!skb) goto out; err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, &inet_sk(sk)->cork.base); out: up->len = 0; WRITE_ONCE(up->pending, 0); return err; } int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct inet_cork_full cork; struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); /* destination address check */ if (sin6) { if (addr_len < offsetof(struct sockaddr, sa_data)) return -EINVAL; switch (sin6->sin6_family) { case AF_INET6: if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; if (ipv6_addr_any(daddr) && ipv6_addr_v4mapped(&np->saddr)) ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), daddr); break; case AF_INET: goto do_udp_sendmsg; case AF_UNSPEC: msg->msg_name = sin6 = NULL; msg->msg_namelen = addr_len = 0; daddr = NULL; break; default: return -EINVAL; } } else if (!READ_ONCE(up->pending)) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; } else daddr = NULL; if (daddr) { if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; sin.sin_addr.s_addr = daddr->s6_addr32[3]; msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: err = ipv6_only_sock(sk) ? -ENETUNREACH : udp_sendmsg(sk, msg, len); msg->msg_name = sin6; msg->msg_namelen = addr_len; return err; } } /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (READ_ONCE(up->pending)) { if (READ_ONCE(up->pending) == AF_INET) return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. */ lock_sock(sk); if (likely(up->pending)) { if (unlikely(up->pending != AF_INET6)) { release_sock(sk); return -EAFNOSUPPORT; } dst = NULL; goto do_append_data; } release_sock(sk); } ulen += sizeof(struct udphdr); memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } } /* * Otherwise it will be difficult to maintain * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; fl6->flowlabel = np->flow_label; connected = true; } if (!fl6->flowi6_oif) fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); connected = false; } if (err < 0) { fl6_sock_release(flowlabel); return err; } if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } if (!opt) { opt = txopt_get(np); opt_to_free = opt; } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; fl6->flowi6_proto = sk->sk_protocol; fl6->flowi6_mark = ipc6.sockc.mark; fl6->daddr = *daddr; if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) fl6->saddr = np->saddr; fl6->fl6_sport = inet->inet_sport; if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, &addr_len, &fl6->saddr); if (err) goto out_no_dst; if (sin6) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { /* BPF program rewrote IPv6-only by IPv4-mapped * IPv6. It's currently unsupported. */ err = -ENOTSUPP; goto out_no_dst; } if (sin6->sin6_port == 0) { /* BPF program set invalid port. Reject it. */ err = -EINVAL; goto out_no_dst; } fl6->fl6_dport = sin6->sin6_port; fl6->daddr = sin6->sin6_addr; } } if (ipv6_addr_any(&fl6->daddr)) fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { fl6->flowi6_oif = READ_ONCE(np->mcast_oif); connected = false; } else if (!fl6->flowi6_oif) fl6->flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto out; } if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, dst_rt6_info(dst), msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, fl6, &cork.base); /* ip6_make_skb steals dst reference */ goto out_no_dst; } lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ /* ... which is an evident application bug. --ANK */ release_sock(sk); net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } WRITE_ONCE(up->pending, AF_INET6); do_append_data: up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) WRITE_ONCE(up->pending, 0); if (err > 0) err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: dst_release(dst); out_no_dst: fl6_sock_release(flowlabel); txopt_put(opt_to_free); if (!err) return len; /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise * we don't have a good statistic (IpOutDiscards but it can be too many * things). We could add another new stat but at least for now that * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); } return err; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; goto out; } EXPORT_SYMBOL(udpv6_sendmsg); static void udpv6_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk)) return; lock_sock(sk); if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); /* protects from races with udp_abort() */ sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); if (static_branch_unlikely(&udpv6_encap_needed_key)) { if (up->encap_type) { void (*encap_destroy)(struct sock *sk); encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); udp_tunnel_cleanup_gro(sk); } } } /* * Socket option code for UDP */ int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct udp_iter_state *)seq->private)->bucket; const struct inet_sock *inet = inet_sk((const struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); __ip6_dgram_sock_seq_show(seq, v, srcp, destp, udp_rqueue_get(v), bucket); } return 0; } const struct seq_operations udp6_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, .show = udp6_seq_show, }; EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, .udp_table = NULL, }; int __net_init udp6_proc_init(struct net *net) { if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops, sizeof(struct udp_iter_state), &udp6_seq_afinfo)) return -ENOMEM; return 0; } void udp6_proc_exit(struct net *net) { remove_proc_entry("udp6", net->proc_net); } #endif /* CONFIG_PROC_FS */ /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udpv6_pre_connect, .connect = udpv6_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .splice_eof = udpv6_splice_eof, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif .memory_allocated = &udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), .h.udp_table = NULL, .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }; int __init udpv6_init(void) { int ret; net_hotdata.udpv6_protocol = (struct inet6_protocol) { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); if (ret) goto out; ret = inet6_register_protosw(&udpv6_protosw); if (ret) goto out_udpv6_protocol; out: return ret; out_udpv6_protocol: inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); goto out; } void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); }
58 57 58 58 20 1 19 7 5 6 2 52 52 52 1 1 1 1 6 6 5 1 1 1 1 6 52 52 52 5 5 5 1 1 1 1 13 13 13 13 13 13 3 3 52 3 1 2 52 52 52 4 1 52 209 209 52 52 52 52 1 52 22 31 51 51 51 38 13 6 1 6 3 3 3 3 3 2 3 2 3 2 2 2 2 1 1 1 71 71 70 75 76 75 5 5 31 50 45 45 45 61 61 67 82 83 25 65 65 65 65 5 82 71 71 3 51 71 70 70 70 2 2 2 1 1 1 1 5 5 2 3 2 76 76 273 312 52 303 1 303 303 1 73 1 2 1 303 73 1 76 294 2 1 4 5 5 1 4 2 3 2 2 2 1 1 1 1 2 2 3 3 3 3 3 3 3 3 8 8 11 2 3 4 3 3 1 1 2 15 6 4 2 6 4 5 2 3 28 26 1 2 11 15 142 54 130 49 49 49 51 52 51 7 7 4 48 48 50 50 11 11 7 7 44 213 84 4 138 41 41 6 12 2 2 1 1 2 9 1 6 2 6 6 6 1 6 47 47 7 7 4 44 38 38 13 38 38 7 5 2 70 11 11 11 11 3 3 3 3 11 10 3 7 7 14 5 11 11 4 2 2 5 1 4 4 2 2 6 3 3 29 29 126 126 38 12 12 12 125 126 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 // SPDX-License-Identifier: GPL-2.0-or-later /* * Bridge multicast support. * * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/err.h> #include <linux/export.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/in.h> #include <linux/jhash.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/netdevice.h> #include <linux/netfilter_bridge.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/inetdevice.h> #include <linux/mroute.h> #include <net/ip.h> #include <net/switchdev.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/icmpv6.h> #include <net/ipv6.h> #include <net/mld.h> #include <net/ip6_checksum.h> #include <net/addrconf.h> #endif #include <trace/events/bridge.h> #include "br_private.h" #include "br_private_mcast_eht.h" static const struct rhashtable_params br_mdb_rht_params = { .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode), .key_offset = offsetof(struct net_bridge_mdb_entry, addr), .key_len = sizeof(struct br_ip), .automatic_shrinking = true, }; static const struct rhashtable_params br_sg_port_rht_params = { .head_offset = offsetof(struct net_bridge_port_group, rhnode), .key_offset = offsetof(struct net_bridge_port_group, key), .key_len = sizeof(struct net_bridge_port_group_sg_key), .automatic_shrinking = true, }; static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query); static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx); static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src); static void br_multicast_port_group_rexmit(struct timer_list *t); static void br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted); static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx); #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src); #endif static struct net_bridge_port_group * __br_multicast_add_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, bool igmpv2_mldv1, bool blocked); static void br_multicast_find_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg); static void __br_multicast_stop(struct net_bridge_mcast *brmctx); static int br_mc_disabled_update(struct net_device *dev, bool value, struct netlink_ext_ack *extack); static struct net_bridge_port_group * br_sg_port_find(struct net_bridge *br, struct net_bridge_port_group_sg_key *sg_p) { lockdep_assert_held_once(&br->multicast_lock); return rhashtable_lookup_fast(&br->sg_port_tbl, sg_p, br_sg_port_rht_params); } static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br, struct br_ip *dst) { return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params); } struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst) { struct net_bridge_mdb_entry *ent; lockdep_assert_held_once(&br->multicast_lock); rcu_read_lock(); ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params); rcu_read_unlock(); return ent; } static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br, __be32 dst, __u16 vid) { struct br_ip br_dst; memset(&br_dst, 0, sizeof(br_dst)); br_dst.dst.ip4 = dst; br_dst.proto = htons(ETH_P_IP); br_dst.vid = vid; return br_mdb_ip_get(br, &br_dst); } #if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, const struct in6_addr *dst, __u16 vid) { struct br_ip br_dst; memset(&br_dst, 0, sizeof(br_dst)); br_dst.dst.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); br_dst.vid = vid; return br_mdb_ip_get(br, &br_dst); } #endif struct net_bridge_mdb_entry * br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { struct net_bridge *br = brmctx->br; struct br_ip ip; if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || br_multicast_ctx_vlan_global_disabled(brmctx)) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) return NULL; memset(&ip, 0, sizeof(ip)); ip.proto = skb->protocol; ip.vid = vid; switch (skb->protocol) { case htons(ETH_P_IP): ip.dst.ip4 = ip_hdr(skb)->daddr; if (brmctx->multicast_igmp_version == 3) { struct net_bridge_mdb_entry *mdb; ip.src.ip4 = ip_hdr(skb)->saddr; mdb = br_mdb_ip_get_rcu(br, &ip); if (mdb) return mdb; ip.src.ip4 = 0; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ip.dst.ip6 = ipv6_hdr(skb)->daddr; if (brmctx->multicast_mld_version == 2) { struct net_bridge_mdb_entry *mdb; ip.src.ip6 = ipv6_hdr(skb)->saddr; mdb = br_mdb_ip_get_rcu(br, &ip); if (mdb) return mdb; memset(&ip.src.ip6, 0, sizeof(ip.src.ip6)); } break; #endif default: ip.proto = 0; ether_addr_copy(ip.dst.mac_addr, eth_hdr(skb)->h_dest); } return br_mdb_ip_get_rcu(br, &ip); } /* IMPORTANT: this function must be used only when the contexts cannot be * passed down (e.g. timer) and must be used for read-only purposes because * the vlan snooping option can change, so it can return any context * (non-vlan or vlan). Its initial intended purpose is to read timer values * from the *current* context based on the option. At worst that could lead * to inconsistent timers when the contexts are changed, i.e. src timer * which needs to re-arm with a specific delay taken from the old context */ static struct net_bridge_mcast_port * br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) { struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx; struct net_bridge_vlan *vlan; lockdep_assert_held_once(&pg->key.port->br->multicast_lock); /* if vlan snooping is disabled use the port's multicast context */ if (!pg->key.addr.vid || !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) goto out; /* locking is tricky here, due to different rules for multicast and * vlans we need to take rcu to find the vlan and make sure it has * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under * multicast_lock which must be already held here, so the vlan's pmctx * can safely be used on return */ rcu_read_lock(); vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid); if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) pmctx = &vlan->port_mcast_ctx; else pmctx = NULL; rcu_read_unlock(); out: return pmctx; } static struct net_bridge_mcast_port * br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid) { struct net_bridge_mcast_port *pmctx = NULL; struct net_bridge_vlan *vlan; lockdep_assert_held_once(&port->br->multicast_lock); if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) return NULL; /* Take RCU to access the vlan. */ rcu_read_lock(); vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid); if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) pmctx = &vlan->port_mcast_ctx; rcu_read_unlock(); return pmctx; } /* when snooping we need to check if the contexts should be used * in the following order: * - if pmctx is non-NULL (port), check if it should be used * - if pmctx is NULL (bridge), check if brmctx should be used */ static bool br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx, const struct net_bridge_mcast_port *pmctx) { if (!netif_running(brmctx->br->dev)) return false; if (pmctx) return !br_multicast_port_ctx_state_disabled(pmctx); else return !br_multicast_ctx_vlan_disabled(brmctx); } static bool br_port_group_equal(struct net_bridge_port_group *p, struct net_bridge_port *port, const unsigned char *src) { if (p->key.port != port) return false; if (!(port->flags & BR_MULTICAST_TO_UNICAST)) return true; return ether_addr_equal(src, p->eth_addr); } static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *sg_ip) { struct net_bridge_port_group_sg_key sg_key; struct net_bridge_port_group *src_pg; struct net_bridge_mcast *brmctx; memset(&sg_key, 0, sizeof(sg_key)); brmctx = br_multicast_port_ctx_get_global(pmctx); sg_key.port = pg->key.port; sg_key.addr = *sg_ip; if (br_sg_port_find(brmctx->br, &sg_key)) return; src_pg = __br_multicast_add_group(brmctx, pmctx, sg_ip, pg->eth_addr, MCAST_INCLUDE, false, false); if (IS_ERR_OR_NULL(src_pg) || src_pg->rt_protocol != RTPROT_KERNEL) return; src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL; } static void __fwd_del_star_excl(struct net_bridge_port_group *pg, struct br_ip *sg_ip) { struct net_bridge_port_group_sg_key sg_key; struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *src_pg; memset(&sg_key, 0, sizeof(sg_key)); sg_key.port = pg->key.port; sg_key.addr = *sg_ip; src_pg = br_sg_port_find(br, &sg_key); if (!src_pg || !(src_pg->flags & MDB_PG_FLAGS_STAR_EXCL) || src_pg->rt_protocol != RTPROT_KERNEL) return; br_multicast_find_del_pg(br, src_pg); } /* When a port group transitions to (or is added as) EXCLUDE we need to add it * to all other ports' S,G entries which are not blocked by the current group * for proper replication, the assumption is that any S,G blocked entries * are already added so the S,G,port lookup should skip them. * When a port group transitions from EXCLUDE -> INCLUDE mode or is being * deleted we need to remove it from all ports' S,G entries where it was * automatically installed before (i.e. where it's MDB_PG_FLAGS_STAR_EXCL). */ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode) { struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *pg_lst; struct net_bridge_mcast_port *pmctx; struct net_bridge_mdb_entry *mp; struct br_ip sg_ip; if (WARN_ON(!br_multicast_is_star_g(&pg->key.addr))) return; mp = br_mdb_ip_get(br, &pg->key.addr); if (!mp) return; pmctx = br_multicast_pg_to_port_ctx(pg); if (!pmctx) return; memset(&sg_ip, 0, sizeof(sg_ip)); sg_ip = pg->key.addr; for (pg_lst = mlock_dereference(mp->ports, br); pg_lst; pg_lst = mlock_dereference(pg_lst->next, br)) { struct net_bridge_group_src *src_ent; if (pg_lst == pg) continue; hlist_for_each_entry(src_ent, &pg_lst->src_list, node) { if (!(src_ent->flags & BR_SGRP_F_INSTALLED)) continue; sg_ip.src = src_ent->addr.src; switch (filter_mode) { case MCAST_INCLUDE: __fwd_del_star_excl(pg, &sg_ip); break; case MCAST_EXCLUDE: __fwd_add_star_excl(pmctx, pg, &sg_ip); break; } } } } /* called when adding a new S,G with host_joined == false by default */ static void br_multicast_sg_host_state(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg) { struct net_bridge_mdb_entry *sg_mp; if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) return; if (!star_mp->host_joined) return; sg_mp = br_mdb_ip_get(star_mp->br, &sg->key.addr); if (!sg_mp) return; sg_mp->host_joined = true; } /* set the host_joined state of all of *,G's S,G entries */ static void br_multicast_star_g_host_state(struct net_bridge_mdb_entry *star_mp) { struct net_bridge *br = star_mp->br; struct net_bridge_mdb_entry *sg_mp; struct net_bridge_port_group *pg; struct br_ip sg_ip; if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) return; memset(&sg_ip, 0, sizeof(sg_ip)); sg_ip = star_mp->addr; for (pg = mlock_dereference(star_mp->ports, br); pg; pg = mlock_dereference(pg->next, br)) { struct net_bridge_group_src *src_ent; hlist_for_each_entry(src_ent, &pg->src_list, node) { if (!(src_ent->flags & BR_SGRP_F_INSTALLED)) continue; sg_ip.src = src_ent->addr.src; sg_mp = br_mdb_ip_get(br, &sg_ip); if (!sg_mp) continue; sg_mp->host_joined = star_mp->host_joined; } } } static void br_multicast_sg_del_exclude_ports(struct net_bridge_mdb_entry *sgmp) { struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; /* *,G exclude ports are only added to S,G entries */ if (WARN_ON(br_multicast_is_star_g(&sgmp->addr))) return; /* we need the STAR_EXCLUDE ports if there are non-STAR_EXCLUDE ports * we should ignore perm entries since they're managed by user-space */ for (pp = &sgmp->ports; (p = mlock_dereference(*pp, sgmp->br)) != NULL; pp = &p->next) if (!(p->flags & (MDB_PG_FLAGS_STAR_EXCL | MDB_PG_FLAGS_PERMANENT))) return; /* currently the host can only have joined the *,G which means * we treat it as EXCLUDE {}, so for an S,G it's considered a * STAR_EXCLUDE entry and we can safely leave it */ sgmp->host_joined = false; for (pp = &sgmp->ports; (p = mlock_dereference(*pp, sgmp->br)) != NULL;) { if (!(p->flags & MDB_PG_FLAGS_PERMANENT)) br_multicast_del_pg(sgmp, p, pp); else pp = &p->next; } } void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg) { struct net_bridge_port_group_sg_key sg_key; struct net_bridge *br = star_mp->br; struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *pg; struct net_bridge_mcast *brmctx; if (WARN_ON(br_multicast_is_star_g(&sg->key.addr))) return; if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) return; br_multicast_sg_host_state(star_mp, sg); memset(&sg_key, 0, sizeof(sg_key)); sg_key.addr = sg->key.addr; /* we need to add all exclude ports to the S,G */ for (pg = mlock_dereference(star_mp->ports, br); pg; pg = mlock_dereference(pg->next, br)) { struct net_bridge_port_group *src_pg; if (pg == sg || pg->filter_mode == MCAST_INCLUDE) continue; sg_key.port = pg->key.port; if (br_sg_port_find(br, &sg_key)) continue; pmctx = br_multicast_pg_to_port_ctx(pg); if (!pmctx) continue; brmctx = br_multicast_port_ctx_get_global(pmctx); src_pg = __br_multicast_add_group(brmctx, pmctx, &sg->key.addr, sg->eth_addr, MCAST_INCLUDE, false, false); if (IS_ERR_OR_NULL(src_pg) || src_pg->rt_protocol != RTPROT_KERNEL) continue; src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL; } } static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) { struct net_bridge_mdb_entry *star_mp; struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *sg; struct net_bridge_mcast *brmctx; struct br_ip sg_ip; if (src->flags & BR_SGRP_F_INSTALLED) return; memset(&sg_ip, 0, sizeof(sg_ip)); pmctx = br_multicast_pg_to_port_ctx(src->pg); if (!pmctx) return; brmctx = br_multicast_port_ctx_get_global(pmctx); sg_ip = src->pg->key.addr; sg_ip.src = src->addr.src; sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip, src->pg->eth_addr, MCAST_INCLUDE, false, !timer_pending(&src->timer)); if (IS_ERR_OR_NULL(sg)) return; src->flags |= BR_SGRP_F_INSTALLED; sg->flags &= ~MDB_PG_FLAGS_STAR_EXCL; /* if it was added by user-space as perm we can skip next steps */ if (sg->rt_protocol != RTPROT_KERNEL && (sg->flags & MDB_PG_FLAGS_PERMANENT)) return; /* the kernel is now responsible for removing this S,G */ del_timer(&sg->timer); star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr); if (!star_mp) return; br_multicast_sg_add_exclude_ports(star_mp, sg); } static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src, bool fastleave) { struct net_bridge_port_group *p, *pg = src->pg; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_entry *mp; struct br_ip sg_ip; memset(&sg_ip, 0, sizeof(sg_ip)); sg_ip = pg->key.addr; sg_ip.src = src->addr.src; mp = br_mdb_ip_get(src->br, &sg_ip); if (!mp) return; for (pp = &mp->ports; (p = mlock_dereference(*pp, src->br)) != NULL; pp = &p->next) { if (!br_port_group_equal(p, pg->key.port, pg->eth_addr)) continue; if (p->rt_protocol != RTPROT_KERNEL && (p->flags & MDB_PG_FLAGS_PERMANENT) && !(src->flags & BR_SGRP_F_USER_ADDED)) break; if (fastleave) p->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_del_pg(mp, p, pp); break; } src->flags &= ~BR_SGRP_F_INSTALLED; } /* install S,G and based on src's timer enable or disable forwarding */ static void br_multicast_fwd_src_handle(struct net_bridge_group_src *src) { struct net_bridge_port_group_sg_key sg_key; struct net_bridge_port_group *sg; u8 old_flags; br_multicast_fwd_src_add(src); memset(&sg_key, 0, sizeof(sg_key)); sg_key.addr = src->pg->key.addr; sg_key.addr.src = src->addr.src; sg_key.port = src->pg->key.port; sg = br_sg_port_find(src->br, &sg_key); if (!sg || (sg->flags & MDB_PG_FLAGS_PERMANENT)) return; old_flags = sg->flags; if (timer_pending(&src->timer)) sg->flags &= ~MDB_PG_FLAGS_BLOCKED; else sg->flags |= MDB_PG_FLAGS_BLOCKED; if (old_flags != sg->flags) { struct net_bridge_mdb_entry *sg_mp; sg_mp = br_mdb_ip_get(src->br, &sg_key.addr); if (!sg_mp) return; br_mdb_notify(src->br->dev, sg_mp, sg, RTM_NEWMDB); } } static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc) { struct net_bridge_mdb_entry *mp; mp = container_of(gc, struct net_bridge_mdb_entry, mcast_gc); WARN_ON(!hlist_unhashed(&mp->mdb_node)); WARN_ON(mp->ports); timer_shutdown_sync(&mp->timer); kfree_rcu(mp, rcu); } static void br_multicast_del_mdb_entry(struct net_bridge_mdb_entry *mp) { struct net_bridge *br = mp->br; rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode, br_mdb_rht_params); hlist_del_init_rcu(&mp->mdb_node); hlist_add_head(&mp->mcast_gc.gc_node, &br->mcast_gc_list); queue_work(system_long_wq, &br->mcast_gc_work); } static void br_multicast_group_expired(struct timer_list *t) { struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer); struct net_bridge *br = mp->br; spin_lock(&br->multicast_lock); if (hlist_unhashed(&mp->mdb_node) || !netif_running(br->dev) || timer_pending(&mp->timer)) goto out; br_multicast_host_leave(mp, true); if (mp->ports) goto out; br_multicast_del_mdb_entry(mp); out: spin_unlock(&br->multicast_lock); } static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) { struct net_bridge_group_src *src; src = container_of(gc, struct net_bridge_group_src, mcast_gc); WARN_ON(!hlist_unhashed(&src->node)); timer_shutdown_sync(&src->timer); kfree_rcu(src, rcu); } void __br_multicast_del_group_src(struct net_bridge_group_src *src) { struct net_bridge *br = src->pg->key.port->br; hlist_del_init_rcu(&src->node); src->pg->src_ents--; hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list); queue_work(system_long_wq, &br->mcast_gc_work); } void br_multicast_del_group_src(struct net_bridge_group_src *src, bool fastleave) { br_multicast_fwd_src_remove(src, fastleave); __br_multicast_del_group_src(src); } static int br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx, struct netlink_ext_ack *extack, const char *what) { u32 max = READ_ONCE(pmctx->mdb_max_entries); u32 n = READ_ONCE(pmctx->mdb_n_entries); if (max && n >= max) { NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u", what, n, max); return -E2BIG; } WRITE_ONCE(pmctx->mdb_n_entries, n + 1); return 0; } static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx) { u32 n = READ_ONCE(pmctx->mdb_n_entries); WARN_ON_ONCE(n == 0); WRITE_ONCE(pmctx->mdb_n_entries, n - 1); } static int br_multicast_port_ngroups_inc(struct net_bridge_port *port, const struct br_ip *group, struct netlink_ext_ack *extack) { struct net_bridge_mcast_port *pmctx; int err; lockdep_assert_held_once(&port->br->multicast_lock); /* Always count on the port context. */ err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack, "Port"); if (err) { trace_br_mdb_full(port->dev, group); return err; } /* Only count on the VLAN context if VID is given, and if snooping on * that VLAN is enabled. */ if (!group->vid) return 0; pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid); if (!pmctx) return 0; err = br_multicast_port_ngroups_inc_one(pmctx, extack, "Port-VLAN"); if (err) { trace_br_mdb_full(port->dev, group); goto dec_one_out; } return 0; dec_one_out: br_multicast_port_ngroups_dec_one(&port->multicast_ctx); return err; } static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid) { struct net_bridge_mcast_port *pmctx; lockdep_assert_held_once(&port->br->multicast_lock); if (vid) { pmctx = br_multicast_port_vid_to_port_ctx(port, vid); if (pmctx) br_multicast_port_ngroups_dec_one(pmctx); } br_multicast_port_ngroups_dec_one(&port->multicast_ctx); } u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx) { return READ_ONCE(pmctx->mdb_n_entries); } void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max) { WRITE_ONCE(pmctx->mdb_max_entries, max); } u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx) { return READ_ONCE(pmctx->mdb_max_entries); } static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc) { struct net_bridge_port_group *pg; pg = container_of(gc, struct net_bridge_port_group, mcast_gc); WARN_ON(!hlist_unhashed(&pg->mglist)); WARN_ON(!hlist_empty(&pg->src_list)); timer_shutdown_sync(&pg->rexmit_timer); timer_shutdown_sync(&pg->timer); kfree_rcu(pg, rcu); } void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_port_group __rcu **pp) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; struct hlist_node *tmp; rcu_assign_pointer(*pp, pg->next); hlist_del_init(&pg->mglist); br_multicast_eht_clean_sets(pg); hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) br_multicast_del_group_src(ent, false); br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); if (!br_multicast_is_star_g(&mp->addr)) { rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode, br_sg_port_rht_params); br_multicast_sg_del_exclude_ports(mp); } else { br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE); } br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid); hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list); queue_work(system_long_wq, &br->mcast_gc_work); if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } static void br_multicast_find_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg) { struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; mp = br_mdb_ip_get(br, &pg->key.addr); if (WARN_ON(!mp)) return; for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p != pg) continue; br_multicast_del_pg(mp, pg, pp); return; } WARN_ON(1); } static void br_multicast_port_group_expired(struct timer_list *t) { struct net_bridge_port_group *pg = from_timer(pg, t, timer); struct net_bridge_group_src *src_ent; struct net_bridge *br = pg->key.port->br; struct hlist_node *tmp; bool changed; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT) goto out; changed = !!(pg->filter_mode == MCAST_EXCLUDE); pg->filter_mode = MCAST_INCLUDE; hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { if (!timer_pending(&src_ent->timer)) { br_multicast_del_group_src(src_ent, false); changed = true; } } if (hlist_empty(&pg->src_list)) { br_multicast_find_del_pg(br, pg); } else if (changed) { struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->key.addr); if (changed && br_multicast_is_star_g(&pg->key.addr)) br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE); if (WARN_ON(!mp)) goto out; br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB); } out: spin_unlock(&br->multicast_lock); } static void br_multicast_gc(struct hlist_head *head) { struct net_bridge_mcast_gc *gcent; struct hlist_node *tmp; hlist_for_each_entry_safe(gcent, tmp, head, gc_node) { hlist_del_init(&gcent->gc_node); gcent->destroy(gcent); } } static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { struct net_bridge_vlan *vlan = NULL; if (pmctx && br_multicast_port_ctx_is_vlan(pmctx)) vlan = pmctx->vlan; else if (br_multicast_ctx_is_vlan(brmctx)) vlan = brmctx->vlan; if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) { u16 vlan_proto; if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0) return; __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid); } } static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, __be32 ip_dst, __be32 group, bool with_srcs, bool over_lmqt, u8 sflag, u8 *igmp_type, bool *need_rexmit) { struct net_bridge_port *p = pg ? pg->key.port : NULL; struct net_bridge_group_src *ent; size_t pkt_size, igmp_hdr_size; unsigned long now = jiffies; struct igmpv3_query *ihv3; void *csum_start = NULL; __sum16 *csum = NULL; struct sk_buff *skb; struct igmphdr *ih; struct ethhdr *eth; unsigned long lmqt; struct iphdr *iph; u16 lmqt_srcs = 0; igmp_hdr_size = sizeof(*ih); if (brmctx->multicast_igmp_version == 3) { igmp_hdr_size = sizeof(*ihv3); if (pg && with_srcs) { lmqt = now + (brmctx->multicast_last_member_interval * brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_lmqt == time_after(ent->timer.expires, lmqt) && ent->src_query_rexmit_cnt > 0) lmqt_srcs++; } if (!lmqt_srcs) return NULL; igmp_hdr_size += lmqt_srcs * sizeof(__be32); } } pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size; if ((p && pkt_size > p->dev->mtu) || pkt_size > brmctx->br->dev->mtu) return NULL; skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IP); skb_reset_mac_header(skb); eth = eth_hdr(skb); ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); ip_eth_mc_map(ip_dst, eth->h_dest); eth->h_proto = htons(ETH_P_IP); skb_put(skb, sizeof(*eth)); skb_set_network_header(skb, skb->len); iph = ip_hdr(skb); iph->tot_len = htons(pkt_size - sizeof(*eth)); iph->version = 4; iph->ihl = 6; iph->tos = 0xc0; iph->id = 0; iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = ip_dst; ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; ((u8 *)&iph[1])[3] = 0; ip_send_check(iph); skb_put(skb, 24); skb_set_transport_header(skb, skb->len); *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; switch (brmctx->multicast_igmp_version) { case 2: ih = igmp_hdr(skb); ih->type = IGMP_HOST_MEMBERSHIP_QUERY; ih->code = (group ? brmctx->multicast_last_member_interval : brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ih->group = group; ih->csum = 0; csum = &ih->csum; csum_start = (void *)ih; break; case 3: ihv3 = igmpv3_query_hdr(skb); ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; ihv3->code = (group ? brmctx->multicast_last_member_interval : brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ihv3->group = group; ihv3->qqic = brmctx->multicast_query_interval / HZ; ihv3->nsrcs = htons(lmqt_srcs); ihv3->resv = 0; ihv3->suppress = sflag; ihv3->qrv = 2; ihv3->csum = 0; csum = &ihv3->csum; csum_start = (void *)ihv3; if (!pg || !with_srcs) break; lmqt_srcs = 0; hlist_for_each_entry(ent, &pg->src_list, node) { if (over_lmqt == time_after(ent->timer.expires, lmqt) && ent->src_query_rexmit_cnt > 0) { ihv3->srcs[lmqt_srcs++] = ent->addr.src.ip4; ent->src_query_rexmit_cnt--; if (need_rexmit && ent->src_query_rexmit_cnt) *need_rexmit = true; } } if (WARN_ON(lmqt_srcs != ntohs(ihv3->nsrcs))) { kfree_skb(skb); return NULL; } break; } if (WARN_ON(!csum || !csum_start)) { kfree_skb(skb); return NULL; } *csum = ip_compute_csum(csum_start, igmp_hdr_size); skb_put(skb, igmp_hdr_size); __skb_pull(skb, sizeof(*eth)); out: return skb; } #if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, const struct in6_addr *ip6_dst, const struct in6_addr *group, bool with_srcs, bool over_llqt, u8 sflag, u8 *igmp_type, bool *need_rexmit) { struct net_bridge_port *p = pg ? pg->key.port : NULL; struct net_bridge_group_src *ent; size_t pkt_size, mld_hdr_size; unsigned long now = jiffies; struct mld2_query *mld2q; void *csum_start = NULL; unsigned long interval; __sum16 *csum = NULL; struct ipv6hdr *ip6h; struct mld_msg *mldq; struct sk_buff *skb; unsigned long llqt; struct ethhdr *eth; u16 llqt_srcs = 0; u8 *hopopt; mld_hdr_size = sizeof(*mldq); if (brmctx->multicast_mld_version == 2) { mld_hdr_size = sizeof(*mld2q); if (pg && with_srcs) { llqt = now + (brmctx->multicast_last_member_interval * brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_llqt == time_after(ent->timer.expires, llqt) && ent->src_query_rexmit_cnt > 0) llqt_srcs++; } if (!llqt_srcs) return NULL; mld_hdr_size += llqt_srcs * sizeof(struct in6_addr); } } pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size; if ((p && pkt_size > p->dev->mtu) || pkt_size > brmctx->br->dev->mtu) return NULL; skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IPV6); /* Ethernet header */ skb_reset_mac_header(skb); eth = eth_hdr(skb); ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); /* IPv6 header + HbH option */ skb_set_network_header(skb, skb->len); ip6h = ipv6_hdr(skb); *(__force __be32 *)ip6h = htonl(0x60000000); ip6h->payload_len = htons(8 + mld_hdr_size); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ip6h->daddr = *ip6_dst; if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false); return NULL; } br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ hopopt[1] = 0; /* length of HbH */ hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ hopopt[3] = 2; /* Length of RA Option */ hopopt[4] = 0; /* Type = 0x0000 (MLD) */ hopopt[5] = 0; hopopt[6] = IPV6_TLV_PAD1; /* Pad1 */ hopopt[7] = IPV6_TLV_PAD1; /* Pad1 */ skb_put(skb, sizeof(*ip6h) + 8); /* ICMPv6 */ skb_set_transport_header(skb, skb->len); interval = ipv6_addr_any(group) ? brmctx->multicast_query_response_interval : brmctx->multicast_last_member_interval; *igmp_type = ICMPV6_MGM_QUERY; switch (brmctx->multicast_mld_version) { case 1: mldq = (struct mld_msg *)icmp6_hdr(skb); mldq->mld_type = ICMPV6_MGM_QUERY; mldq->mld_code = 0; mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; mldq->mld_mca = *group; csum = &mldq->mld_cksum; csum_start = (void *)mldq; break; case 2: mld2q = (struct mld2_query *)icmp6_hdr(skb); mld2q->mld2q_mrc = htons((u16)jiffies_to_msecs(interval)); mld2q->mld2q_type = ICMPV6_MGM_QUERY; mld2q->mld2q_code = 0; mld2q->mld2q_cksum = 0; mld2q->mld2q_resv1 = 0; mld2q->mld2q_resv2 = 0; mld2q->mld2q_suppress = sflag; mld2q->mld2q_qrv = 2; mld2q->mld2q_nsrcs = htons(llqt_srcs); mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ; mld2q->mld2q_mca = *group; csum = &mld2q->mld2q_cksum; csum_start = (void *)mld2q; if (!pg || !with_srcs) break; llqt_srcs = 0; hlist_for_each_entry(ent, &pg->src_list, node) { if (over_llqt == time_after(ent->timer.expires, llqt) && ent->src_query_rexmit_cnt > 0) { mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.src.ip6; ent->src_query_rexmit_cnt--; if (need_rexmit && ent->src_query_rexmit_cnt) *need_rexmit = true; } } if (WARN_ON(llqt_srcs != ntohs(mld2q->mld2q_nsrcs))) { kfree_skb(skb); return NULL; } break; } if (WARN_ON(!csum || !csum_start)) { kfree_skb(skb); return NULL; } *csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, mld_hdr_size, IPPROTO_ICMPV6, csum_partial(csum_start, mld_hdr_size, 0)); skb_put(skb, mld_hdr_size); __skb_pull(skb, sizeof(*eth)); out: return skb; } #endif static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, bool with_srcs, bool over_lmqt, u8 sflag, u8 *igmp_type, bool *need_rexmit) { __be32 ip4_dst; switch (group->proto) { case htons(ETH_P_IP): ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP); return br_ip4_multicast_alloc_query(brmctx, pmctx, pg, ip4_dst, group->dst.ip4, with_srcs, over_lmqt, sflag, igmp_type, need_rexmit); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct in6_addr ip6_dst; if (ip_dst) ip6_dst = ip_dst->dst.ip6; else ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0, htonl(1)); return br_ip6_multicast_alloc_query(brmctx, pmctx, pg, &ip6_dst, &group->dst.ip6, with_srcs, over_lmqt, sflag, igmp_type, need_rexmit); } #endif } return NULL; } struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, struct br_ip *group) { struct net_bridge_mdb_entry *mp; int err; mp = br_mdb_ip_get(br, group); if (mp) return mp; if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) { trace_br_mdb_full(br->dev, group); br_mc_disabled_update(br->dev, false, NULL); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false); return ERR_PTR(-E2BIG); } mp = kzalloc(sizeof(*mp), GFP_ATOMIC); if (unlikely(!mp)) return ERR_PTR(-ENOMEM); mp->br = br; mp->addr = *group; mp->mcast_gc.destroy = br_multicast_destroy_mdb_entry; timer_setup(&mp->timer, br_multicast_group_expired, 0); err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode, br_mdb_rht_params); if (err) { kfree(mp); mp = ERR_PTR(err); } else { hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list); } return mp; } static void br_multicast_group_src_expired(struct timer_list *t) { struct net_bridge_group_src *src = from_timer(src, t, timer); struct net_bridge_port_group *pg; struct net_bridge *br = src->br; spin_lock(&br->multicast_lock); if (hlist_unhashed(&src->node) || !netif_running(br->dev) || timer_pending(&src->timer)) goto out; pg = src->pg; if (pg->filter_mode == MCAST_INCLUDE) { br_multicast_del_group_src(src, false); if (!hlist_empty(&pg->src_list)) goto out; br_multicast_find_del_pg(br, pg); } else { br_multicast_fwd_src_handle(src); } out: spin_unlock(&br->multicast_lock); } struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip) { struct net_bridge_group_src *ent; switch (ip->proto) { case htons(ETH_P_IP): hlist_for_each_entry(ent, &pg->src_list, node) if (ip->src.ip4 == ent->addr.src.ip4) return ent; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): hlist_for_each_entry(ent, &pg->src_list, node) if (!ipv6_addr_cmp(&ent->addr.src.ip6, &ip->src.ip6)) return ent; break; #endif } return NULL; } struct net_bridge_group_src * br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip) { struct net_bridge_group_src *grp_src; if (unlikely(pg->src_ents >= PG_SRC_ENT_LIMIT)) return NULL; switch (src_ip->proto) { case htons(ETH_P_IP): if (ipv4_is_zeronet(src_ip->src.ip4) || ipv4_is_multicast(src_ip->src.ip4)) return NULL; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): if (ipv6_addr_any(&src_ip->src.ip6) || ipv6_addr_is_multicast(&src_ip->src.ip6)) return NULL; break; #endif } grp_src = kzalloc(sizeof(*grp_src), GFP_ATOMIC); if (unlikely(!grp_src)) return NULL; grp_src->pg = pg; grp_src->br = pg->key.port->br; grp_src->addr = *src_ip; grp_src->mcast_gc.destroy = br_multicast_destroy_group_src; timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0); hlist_add_head_rcu(&grp_src->node, &pg->src_list); pg->src_ents++; return grp_src; } struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, const struct br_ip *group, struct net_bridge_port_group __rcu *next, unsigned char flags, const unsigned char *src, u8 filter_mode, u8 rt_protocol, struct netlink_ext_ack *extack) { struct net_bridge_port_group *p; int err; err = br_multicast_port_ngroups_inc(port, group, extack); if (err) return NULL; p = kzalloc(sizeof(*p), GFP_ATOMIC); if (unlikely(!p)) { NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group"); goto dec_out; } p->key.addr = *group; p->key.port = port; p->flags = flags; p->filter_mode = filter_mode; p->rt_protocol = rt_protocol; p->eht_host_tree = RB_ROOT; p->eht_set_tree = RB_ROOT; p->mcast_gc.destroy = br_multicast_destroy_port_group; INIT_HLIST_HEAD(&p->src_list); if (!br_multicast_is_star_g(group) && rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode, br_sg_port_rht_params)) { NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group"); goto free_out; } rcu_assign_pointer(p->next, next); timer_setup(&p->timer, br_multicast_port_group_expired, 0); timer_setup(&p->rexmit_timer, br_multicast_port_group_rexmit, 0); hlist_add_head(&p->mglist, &port->mglist); if (src) memcpy(p->eth_addr, src, ETH_ALEN); else eth_broadcast_addr(p->eth_addr); return p; free_out: kfree(p); dec_out: br_multicast_port_ngroups_dec(port, group->vid); return NULL; } void br_multicast_del_port_group(struct net_bridge_port_group *p) { struct net_bridge_port *port = p->key.port; __u16 vid = p->key.addr.vid; hlist_del_init(&p->mglist); if (!br_multicast_is_star_g(&p->key.addr)) rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode, br_sg_port_rht_params); kfree(p); br_multicast_port_ngroups_dec(port, vid); } void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify) { if (!mp->host_joined) { mp->host_joined = true; if (br_multicast_is_star_g(&mp->addr)) br_multicast_star_g_host_state(mp); if (notify) br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB); } if (br_group_is_l2(&mp->addr)) return; mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval); } void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) { if (!mp->host_joined) return; mp->host_joined = false; if (br_multicast_is_star_g(&mp->addr)) br_multicast_star_g_host_state(mp); if (notify) br_mdb_notify(mp->br->dev, mp, NULL, RTM_DELMDB); } static struct net_bridge_port_group * __br_multicast_add_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, bool igmpv2_mldv1, bool blocked) { struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p = NULL; struct net_bridge_mdb_entry *mp; unsigned long now = jiffies; if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; mp = br_multicast_new_group(brmctx->br, group); if (IS_ERR(mp)) return ERR_CAST(mp); if (!pmctx) { br_multicast_host_join(brmctx, mp, true); goto out; } for (pp = &mp->ports; (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (br_port_group_equal(p, pmctx->port, src)) goto found; if ((unsigned long)p->key.port < (unsigned long)pmctx->port) break; } p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src, filter_mode, RTPROT_KERNEL, NULL); if (unlikely(!p)) { p = ERR_PTR(-ENOMEM); goto out; } rcu_assign_pointer(*pp, p); if (blocked) p->flags |= MDB_PG_FLAGS_BLOCKED; br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB); found: if (igmpv2_mldv1) mod_timer(&p->timer, now + brmctx->multicast_membership_interval); out: return p; } static int br_multicast_add_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, bool igmpv2_mldv1) { struct net_bridge_port_group *pg; int err; spin_lock(&brmctx->br->multicast_lock); pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode, igmpv2_mldv1, false); /* NULL is considered valid for host joined groups */ err = PTR_ERR_OR_ZERO(pg); spin_unlock(&brmctx->br->multicast_lock); return err; } static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src, bool igmpv2) { struct br_ip br_group; u8 filter_mode; if (ipv4_is_local_multicast(group)) return 0; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE; return br_multicast_add_group(brmctx, pmctx, &br_group, src, filter_mode, igmpv2); } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src, bool mldv1) { struct br_ip br_group; u8 filter_mode; if (ipv6_addr_is_ll_all_nodes(group)) return 0; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE; return br_multicast_add_group(brmctx, pmctx, &br_group, src, filter_mode, mldv1); } #endif static bool br_multicast_rport_del(struct hlist_node *rlist) { if (hlist_unhashed(rlist)) return false; hlist_del_init_rcu(rlist); return true; } static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { return br_multicast_rport_del(&pmctx->ip4_rlist); } static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) return br_multicast_rport_del(&pmctx->ip6_rlist); #else return false; #endif } static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx, struct timer_list *t, struct hlist_node *rlist) { struct net_bridge *br = pmctx->port->br; bool del; spin_lock(&br->multicast_lock); if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || pmctx->multicast_router == MDB_RTR_TYPE_PERM || timer_pending(t)) goto out; del = br_multicast_rport_del(rlist); br_multicast_rport_del_notify(pmctx, del); out: spin_unlock(&br->multicast_lock); } static void br_ip4_multicast_router_expired(struct timer_list *t) { struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, ip4_mc_router_timer); br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_router_expired(struct timer_list *t) { struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, ip6_mc_router_timer); br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist); } #endif static void br_mc_router_state_change(struct net_bridge *p, bool is_mc_router) { struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, .flags = SWITCHDEV_F_DEFER, .u.mrouter = is_mc_router, }; switchdev_port_attr_set(p->dev, &attr, NULL); } static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx, struct timer_list *timer) { spin_lock(&brmctx->br->multicast_lock); if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED || brmctx->multicast_router == MDB_RTR_TYPE_PERM || br_ip4_multicast_is_router(brmctx) || br_ip6_multicast_is_router(brmctx)) goto out; br_mc_router_state_change(brmctx->br, false); out: spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_local_router_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip4_mc_router_timer); br_multicast_local_router_expired(brmctx, t); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_local_router_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip6_mc_router_timer); br_multicast_local_router_expired(brmctx, t); } #endif static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { spin_lock(&brmctx->br->multicast_lock); if (!netif_running(brmctx->br->dev) || br_multicast_ctx_vlan_global_disabled(brmctx) || !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) goto out; br_multicast_start_querier(brmctx, query); out: spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_querier_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip4_other_query.timer); br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_querier_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip6_other_query.timer); br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query); } #endif static void br_multicast_query_delay_expired(struct timer_list *t) { } static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) { if (ip->proto == htons(ETH_P_IP)) brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; #if IS_ENABLED(CONFIG_IPV6) else brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; #endif } static void __br_multicast_send_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, bool with_srcs, u8 sflag, bool *need_rexmit) { bool over_lmqt = !!sflag; struct sk_buff *skb; u8 igmp_type; if (!br_multicast_ctx_should_use(brmctx, pmctx) || !br_multicast_ctx_matches_vlan_snooping(brmctx)) return; again_under_lmqt: skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group, with_srcs, over_lmqt, sflag, &igmp_type, need_rexmit); if (!skb) return; if (pmctx) { skb->dev = pmctx->port->dev; br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type, BR_MCAST_DIR_TX); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); if (over_lmqt && with_srcs && sflag) { over_lmqt = false; goto again_under_lmqt; } } else { br_multicast_select_own_querier(brmctx, group, skb); br_multicast_count(brmctx->br, NULL, skb, igmp_type, BR_MCAST_DIR_RX); netif_rx(skb); } } static void br_multicast_read_querier(const struct bridge_mcast_querier *querier, struct bridge_mcast_querier *dest) { unsigned int seq; memset(dest, 0, sizeof(*dest)); do { seq = read_seqcount_begin(&querier->seq); dest->port_ifidx = querier->port_ifidx; memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip)); } while (read_seqcount_retry(&querier->seq, seq)); } static void br_multicast_update_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_querier *querier, int ifindex, struct br_ip *saddr) { write_seqcount_begin(&querier->seq); querier->port_ifidx = ifindex; memcpy(&querier->addr, saddr, sizeof(*saddr)); write_seqcount_end(&querier->seq); } static void br_multicast_send_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *own_query) { struct bridge_mcast_other_query *other_query = NULL; struct bridge_mcast_querier *querier; struct br_ip br_group; unsigned long time; if (!br_multicast_ctx_should_use(brmctx, pmctx) || !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || !brmctx->multicast_querier) return; memset(&br_group.dst, 0, sizeof(br_group.dst)); if (pmctx ? (own_query == &pmctx->ip4_own_query) : (own_query == &brmctx->ip4_own_query)) { querier = &brmctx->ip4_querier; other_query = &brmctx->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { querier = &brmctx->ip6_querier; other_query = &brmctx->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif } if (!other_query || timer_pending(&other_query->timer)) return; /* we're about to select ourselves as querier */ if (!pmctx && querier->port_ifidx) { struct br_ip zeroip = {}; br_multicast_update_querier(brmctx, querier, 0, &zeroip); } __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false, 0, NULL); time = jiffies; time += own_query->startup_sent < brmctx->multicast_startup_query_count ? brmctx->multicast_startup_query_interval : brmctx->multicast_query_interval; mod_timer(&own_query->timer, time); } static void br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *query) { struct net_bridge *br = pmctx->port->br; struct net_bridge_mcast *brmctx; spin_lock(&br->multicast_lock); if (br_multicast_port_ctx_state_stopped(pmctx)) goto out; brmctx = br_multicast_port_ctx_get_global(pmctx); if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; br_multicast_send_query(brmctx, pmctx, query); out: spin_unlock(&br->multicast_lock); } static void br_ip4_multicast_port_query_expired(struct timer_list *t) { struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, ip4_own_query.timer); br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_port_query_expired(struct timer_list *t) { struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, ip6_own_query.timer); br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query); } #endif static void br_multicast_port_group_rexmit(struct timer_list *t) { struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer); struct bridge_mcast_other_query *other_query = NULL; struct net_bridge *br = pg->key.port->br; struct net_bridge_mcast_port *pmctx; struct net_bridge_mcast *brmctx; bool need_rexmit = false; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) goto out; pmctx = br_multicast_pg_to_port_ctx(pg); if (!pmctx) goto out; brmctx = br_multicast_port_ctx_get_global(pmctx); if (!brmctx->multicast_querier) goto out; if (pg->key.addr.proto == htons(ETH_P_IP)) other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else other_query = &brmctx->ip6_other_query; #endif if (!other_query || timer_pending(&other_query->timer)) goto out; if (pg->grp_query_rexmit_cnt) { pg->grp_query_rexmit_cnt--; __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 1, NULL); } __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 0, &need_rexmit); if (pg->grp_query_rexmit_cnt || need_rexmit) mod_timer(&pg->rexmit_timer, jiffies + brmctx->multicast_last_member_interval); out: spin_unlock(&br->multicast_lock); } static int br_mc_disabled_update(struct net_device *dev, bool value, struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, .flags = SWITCHDEV_F_DEFER, .u.mc_disabled = !value, }; return switchdev_port_attr_set(dev, &attr, extack); } void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx) { pmctx->port = port; pmctx->vlan = vlan; pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; timer_setup(&pmctx->ip4_mc_router_timer, br_ip4_multicast_router_expired, 0); timer_setup(&pmctx->ip4_own_query.timer, br_ip4_multicast_port_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) timer_setup(&pmctx->ip6_mc_router_timer, br_ip6_multicast_router_expired, 0); timer_setup(&pmctx->ip6_own_query.timer, br_ip6_multicast_port_query_expired, 0); #endif } void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&pmctx->ip6_mc_router_timer); #endif del_timer_sync(&pmctx->ip4_mc_router_timer); } int br_multicast_add_port(struct net_bridge_port *port) { int err; port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx); err = br_mc_disabled_update(port->dev, br_opt_get(port->br, BROPT_MULTICAST_ENABLED), NULL); if (err && err != -EOPNOTSUPP) return err; port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!port->mcast_stats) return -ENOMEM; return 0; } void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); spin_unlock_bh(&br->multicast_lock); flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } static void br_multicast_enable(struct bridge_mcast_own_query *query) { query->startup_sent = 0; if (try_to_del_timer_sync(&query->timer) >= 0 || del_timer(&query->timer)) mod_timer(&query->timer, jiffies); } static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { struct net_bridge *br = pmctx->port->br; struct net_bridge_mcast *brmctx; brmctx = br_multicast_port_ctx_get_global(pmctx); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev)) return; br_multicast_enable(&pmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) br_multicast_enable(&pmctx->ip6_own_query); #endif if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) { br_ip4_multicast_add_router(brmctx, pmctx); br_ip6_multicast_add_router(brmctx, pmctx); } if (br_multicast_port_ctx_is_vlan(pmctx)) { struct net_bridge_port_group *pg; u32 n = 0; /* The mcast_n_groups counter might be wrong. First, * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries * are flushed, thus mcast_n_groups after the toggle does not * reflect the true values. And second, permanent entries added * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected * either. Thus we have to refresh the counter. */ hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) { if (pg->key.addr.vid == pmctx->vlan->vid) n++; } WRITE_ONCE(pmctx->mdb_n_entries, n); } } void br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; spin_lock_bh(&br->multicast_lock); __br_multicast_enable_port_ctx(&port->multicast_ctx); spin_unlock_bh(&br->multicast_lock); } static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) { struct net_bridge_port_group *pg; struct hlist_node *n; bool del = false; hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist) if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) && (!br_multicast_port_ctx_is_vlan(pmctx) || pg->key.addr.vid == pmctx->vlan->vid)) br_multicast_find_del_pg(pmctx->port->br, pg); del |= br_ip4_multicast_rport_del(pmctx); del_timer(&pmctx->ip4_mc_router_timer); del_timer(&pmctx->ip4_own_query.timer); del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) del_timer(&pmctx->ip6_mc_router_timer); del_timer(&pmctx->ip6_own_query.timer); #endif br_multicast_rport_del_notify(pmctx, del); } void br_multicast_disable_port(struct net_bridge_port *port) { spin_lock_bh(&port->br->multicast_lock); __br_multicast_disable_port_ctx(&port->multicast_ctx); spin_unlock_bh(&port->br->multicast_lock); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) { struct net_bridge_group_src *ent; struct hlist_node *tmp; int deleted = 0; hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) if (ent->flags & BR_SGRP_F_DELETE) { br_multicast_del_group_src(ent, false); deleted++; } return deleted; } static void __grp_src_mod_timer(struct net_bridge_group_src *src, unsigned long expires) { mod_timer(&src->timer, expires); br_multicast_fwd_src_handle(src); } static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; u32 lmqc = brmctx->multicast_last_member_count; unsigned long lmqt, lmi, now = jiffies; struct net_bridge_group_src *ent; if (!netif_running(brmctx->br->dev) || !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else other_query = &brmctx->ip6_other_query; #endif lmqt = now + br_multicast_lmqt(brmctx); hlist_for_each_entry(ent, &pg->src_list, node) { if (ent->flags & BR_SGRP_F_SEND) { ent->flags &= ~BR_SGRP_F_SEND; if (ent->timer.expires > lmqt) { if (brmctx->multicast_querier && other_query && !timer_pending(&other_query->timer)) ent->src_query_rexmit_cnt = lmqc; __grp_src_mod_timer(ent, lmqt); } } } if (!brmctx->multicast_querier || !other_query || timer_pending(&other_query->timer)) return; __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 1, NULL); lmi = now + brmctx->multicast_last_member_interval; if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) mod_timer(&pg->rexmit_timer, lmi); } static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; unsigned long now = jiffies, lmi; if (!netif_running(brmctx->br->dev) || !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else other_query = &brmctx->ip6_other_query; #endif if (brmctx->multicast_querier && other_query && !timer_pending(&other_query->timer)) { lmi = now + brmctx->multicast_last_member_interval; pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1; __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 0, NULL); if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) mod_timer(&pg->rexmit_timer, lmi); } if (pg->filter_mode == MCAST_EXCLUDE && (!timer_pending(&pg->timer) || time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx)))) mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx)); } /* State Msg type New state Actions * INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; struct br_ip src_ip; u32 src_idx; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) changed = true; } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; return changed; } /* State Msg type New state Actions * INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 * Delete (A-B) * Group Timer=GMI */ static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; struct br_ip src_ip; u32 src_idx; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags |= BR_SGRP_F_DELETE; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) ent->flags &= ~BR_SGRP_F_DELETE; else ent = br_multicast_new_group_src(pg, &src_ip); if (ent) br_multicast_fwd_src_handle(ent); } br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); __grp_src_delete_marked(pg); } /* State Msg type New state Actions * EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI * Delete (X-A) * Delete (Y-A) * Group Timer=GMI */ static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; struct br_ip src_ip; u32 src_idx; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags |= BR_SGRP_F_DELETE; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; } else { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) { __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); changed = true; } } } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (__grp_src_delete_marked(pg)) changed = true; return changed; } static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } /* State Msg type New state Actions * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI * Send Q(G,A-B) */ static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags |= BR_SGRP_F_SEND; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_SEND; to_send--; } else { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) changed = true; } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } /* State Msg type New state Actions * EXCLUDE (X,Y) TO_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI * Send Q(G,X-A) * Send Q(G) */ static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) if (timer_pending(&ent->timer)) ent->flags |= BR_SGRP_F_SEND; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { if (timer_pending(&ent->timer)) { ent->flags &= ~BR_SGRP_F_SEND; to_send--; } } else { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) changed = true; } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); __grp_send_query_and_rexmit(brmctx, pmctx, pg); return changed; } static bool br_multicast_toin(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; } if (br_multicast_eht_should_del_pg(pg)) { pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_find_del_pg(pg->key.port->br, pg); /* a notification has already been sent and we shouldn't * access pg after the delete so we have to return false */ changed = false; } return changed; } /* State Msg type New state Actions * INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 * Delete (A-B) * Send Q(G,A*B) * Group Timer=GMI */ static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | BR_SGRP_F_SEND; to_send++; } else { ent = br_multicast_new_group_src(pg, &src_ip); } if (ent) br_multicast_fwd_src_handle(ent); } br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); __grp_src_delete_marked(pg); if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); } /* State Msg type New state Actions * EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer * Delete (X-A) * Delete (Y-A) * Send Q(G,A-Y) * Group Timer=GMI */ static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; } else { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) { __grp_src_mod_timer(ent, pg->timer.expires); changed = true; } } if (ent && timer_pending(&ent->timer)) { ent->flags |= BR_SGRP_F_SEND; to_send++; } } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (__grp_src_delete_marked(pg)) changed = true; if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } static bool br_multicast_toex(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags &= ~BR_SGRP_F_SEND; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags |= BR_SGRP_F_SEND; to_send++; } } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } /* State Msg type New state Actions * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer * Send Q(G,A-Y) */ static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) ent->flags &= ~BR_SGRP_F_SEND; memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); if (ent) { __grp_src_mod_timer(ent, pg->timer.expires); changed = true; } } if (ent && timer_pending(&ent->timer)) { ent->flags |= BR_SGRP_F_SEND; to_send++; } } if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type)) changed = true; if (to_send) __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } static bool br_multicast_block(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); break; } if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) || br_multicast_eht_should_del_pg(pg)) { if (br_multicast_eht_should_del_pg(pg)) pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_find_del_pg(pg->key.port->br, pg); /* a notification has already been sent and we shouldn't * access pg after the delete so we have to return false */ changed = false; } return changed; } static struct net_bridge_port_group * br_multicast_find_port(struct net_bridge_mdb_entry *mp, struct net_bridge_port *p, const unsigned char *src) { struct net_bridge *br __maybe_unused = mp->br; struct net_bridge_port_group *pg; for (pg = mlock_dereference(mp->ports, br); pg; pg = mlock_dereference(pg->next, br)) if (br_port_group_equal(pg, p, src)) return pg; return NULL; } static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { bool igmpv2 = brmctx->multicast_igmp_version == 2; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; const unsigned char *src; struct igmpv3_report *ih; struct igmpv3_grec *grec; int i, len, num, type; __be32 group, *h_addr; bool changed = false; int err = 0; u16 nsrcs; ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = skb_transport_offset(skb) + sizeof(*ih); for (i = 0; i < num; i++) { len += sizeof(*grec); if (!ip_mc_may_pull(skb, len)) return -EINVAL; grec = (void *)(skb->data + len - sizeof(*grec)); group = grec->grec_mca; type = grec->grec_type; nsrcs = ntohs(grec->grec_nsrcs); len += nsrcs * 4; if (!ip_mc_may_pull(skb, len)) return -EINVAL; switch (type) { case IGMPV3_MODE_IS_INCLUDE: case IGMPV3_MODE_IS_EXCLUDE: case IGMPV3_CHANGE_TO_INCLUDE: case IGMPV3_CHANGE_TO_EXCLUDE: case IGMPV3_ALLOW_NEW_SOURCES: case IGMPV3_BLOCK_OLD_SOURCES: break; default: continue; } src = eth_hdr(skb)->h_source; if (nsrcs == 0 && (type == IGMPV3_CHANGE_TO_INCLUDE || type == IGMPV3_MODE_IS_INCLUDE)) { if (!pmctx || igmpv2) { br_ip4_multicast_leave_group(brmctx, pmctx, group, vid, src); continue; } } else { err = br_ip4_multicast_add_group(brmctx, pmctx, group, vid, src, igmpv2); if (err) break; } if (!pmctx || igmpv2) continue; spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto unlock_continue; mdst = br_mdb_ip4_get(brmctx->br, group, vid); if (!mdst) goto unlock_continue; pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; /* reload grec and host addr */ grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4)); h_addr = &ip_hdr(skb)->saddr; switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_INCLUDE: changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_EXCLUDE: changed = br_multicast_isexc(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_INCLUDE: changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_EXCLUDE: changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_BLOCK_OLD_SOURCES: changed = br_multicast_block(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32), type); break; } if (changed) br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: spin_unlock(&brmctx->br->multicast_lock); } return err; } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { bool mldv1 = brmctx->multicast_mld_version == 1; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; unsigned int nsrcs_offset; struct mld2_report *mld2r; const unsigned char *src; struct in6_addr *h_addr; struct mld2_grec *grec; unsigned int grec_len; bool changed = false; int i, len, num; int err = 0; if (!ipv6_mc_may_pull(skb, sizeof(*mld2r))) return -EINVAL; mld2r = (struct mld2_report *)icmp6_hdr(skb); num = ntohs(mld2r->mld2r_ngrec); len = skb_transport_offset(skb) + sizeof(*mld2r); for (i = 0; i < num; i++) { __be16 *_nsrcs, __nsrcs; u16 nsrcs; nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); if (skb_transport_offset(skb) + ipv6_transport_len(skb) < nsrcs_offset + sizeof(__nsrcs)) return -EINVAL; _nsrcs = skb_header_pointer(skb, nsrcs_offset, sizeof(__nsrcs), &__nsrcs); if (!_nsrcs) return -EINVAL; nsrcs = ntohs(*_nsrcs); grec_len = struct_size(grec, grec_src, nsrcs); if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); len += grec_len; switch (grec->grec_type) { case MLD2_MODE_IS_INCLUDE: case MLD2_MODE_IS_EXCLUDE: case MLD2_CHANGE_TO_INCLUDE: case MLD2_CHANGE_TO_EXCLUDE: case MLD2_ALLOW_NEW_SOURCES: case MLD2_BLOCK_OLD_SOURCES: break; default: continue; } src = eth_hdr(skb)->h_source; if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && nsrcs == 0) { if (!pmctx || mldv1) { br_ip6_multicast_leave_group(brmctx, pmctx, &grec->grec_mca, vid, src); continue; } } else { err = br_ip6_multicast_add_group(brmctx, pmctx, &grec->grec_mca, vid, src, mldv1); if (err) break; } if (!pmctx || mldv1) continue; spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto unlock_continue; mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid); if (!mdst) goto unlock_continue; pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; h_addr = &ipv6_hdr(skb)->saddr; switch (grec->grec_type) { case MLD2_ALLOW_NEW_SOURCES: changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_INCLUDE: changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_EXCLUDE: changed = br_multicast_isexc(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_INCLUDE: changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_EXCLUDE: changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_BLOCK_OLD_SOURCES: changed = br_multicast_block(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; } if (changed) br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: spin_unlock(&brmctx->br->multicast_lock); } return err; } #endif static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct br_ip *saddr) { int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0; struct timer_list *own_timer, *other_timer; struct bridge_mcast_querier *querier; switch (saddr->proto) { case htons(ETH_P_IP): querier = &brmctx->ip4_querier; own_timer = &brmctx->ip4_own_query.timer; other_timer = &brmctx->ip4_other_query.timer; if (!querier->addr.src.ip4 || ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4)) goto update; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): querier = &brmctx->ip6_querier; own_timer = &brmctx->ip6_own_query.timer; other_timer = &brmctx->ip6_other_query.timer; if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0) goto update; break; #endif default: return false; } if (!timer_pending(own_timer) && !timer_pending(other_timer)) goto update; return false; update: br_multicast_update_querier(brmctx, querier, port_ifidx, saddr); return true; } static struct net_bridge_port * __br_multicast_get_querier_port(struct net_bridge *br, const struct bridge_mcast_querier *querier) { int port_ifidx = READ_ONCE(querier->port_ifidx); struct net_bridge_port *p; struct net_device *dev; if (port_ifidx == 0) return NULL; dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx); if (!dev) return NULL; p = br_port_get_rtnl_rcu(dev); if (!p || p->br != br) return NULL; return p; } size_t br_multicast_querier_state_size(void) { return nla_total_size(0) + /* nest attribute */ nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IP_PORT */ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */ #if IS_ENABLED(CONFIG_IPV6) nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IPV6_PORT */ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */ #endif 0; } /* protected by rtnl or rcu */ int br_multicast_dump_querier_state(struct sk_buff *skb, const struct net_bridge_mcast *brmctx, int nest_attr) { struct bridge_mcast_querier querier = {}; struct net_bridge_port *p; struct nlattr *nest; if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || br_multicast_ctx_vlan_global_disabled(brmctx)) return 0; nest = nla_nest_start(skb, nest_attr); if (!nest) return -EMSGSIZE; rcu_read_lock(); if (!brmctx->multicast_querier && !timer_pending(&brmctx->ip4_other_query.timer)) goto out_v6; br_multicast_read_querier(&brmctx->ip4_querier, &querier); if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS, querier.addr.src.ip4)) { rcu_read_unlock(); goto out_err; } p = __br_multicast_get_querier_port(brmctx->br, &querier); if (timer_pending(&brmctx->ip4_other_query.timer) && (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER, br_timer_value(&brmctx->ip4_other_query.timer), BRIDGE_QUERIER_PAD) || (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) { rcu_read_unlock(); goto out_err; } out_v6: #if IS_ENABLED(CONFIG_IPV6) if (!brmctx->multicast_querier && !timer_pending(&brmctx->ip6_other_query.timer)) goto out; br_multicast_read_querier(&brmctx->ip6_querier, &querier); if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS, &querier.addr.src.ip6)) { rcu_read_unlock(); goto out_err; } p = __br_multicast_get_querier_port(brmctx->br, &querier); if (timer_pending(&brmctx->ip6_other_query.timer) && (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER, br_timer_value(&brmctx->ip6_other_query.timer), BRIDGE_QUERIER_PAD) || (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT, p->dev->ifindex)))) { rcu_read_unlock(); goto out_err; } out: #endif rcu_read_unlock(); nla_nest_end(skb, nest); if (!nla_len(nest)) nla_nest_cancel(skb, nest); return 0; out_err: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static void br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, struct bridge_mcast_other_query *query, unsigned long max_delay) { if (!timer_pending(&query->timer)) mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } static void br_port_mc_router_state_change(struct net_bridge_port *p, bool is_mc_router) { struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_MROUTER, .flags = SWITCHDEV_F_DEFER, .u.mrouter = is_mc_router, }; switchdev_port_attr_set(p->dev, &attr, NULL); } static struct net_bridge_port * br_multicast_rport_from_node(struct net_bridge_mcast *brmctx, struct hlist_head *mc_router_list, struct hlist_node *rlist) { struct net_bridge_mcast_port *pmctx; #if IS_ENABLED(CONFIG_IPV6) if (mc_router_list == &brmctx->ip6_mc_router_list) pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, ip6_rlist); else #endif pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, ip4_rlist); return pmctx->port; } static struct hlist_node * br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx, struct net_bridge_port *port, struct hlist_head *mc_router_list) { struct hlist_node *slot = NULL; struct net_bridge_port *p; struct hlist_node *rlist; hlist_for_each(rlist, mc_router_list) { p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist); if ((unsigned long)port >= (unsigned long)p) break; slot = rlist; } return slot; } static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx, struct hlist_node *rnode) { #if IS_ENABLED(CONFIG_IPV6) if (rnode != &pmctx->ip6_rlist) return hlist_unhashed(&pmctx->ip6_rlist); else return hlist_unhashed(&pmctx->ip4_rlist); #else return true; #endif } /* Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ static void br_multicast_add_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct hlist_node *rlist, struct hlist_head *mc_router_list) { struct hlist_node *slot; if (!hlist_unhashed(rlist)) return; slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list); if (slot) hlist_add_behind_rcu(rlist, slot); else hlist_add_head_rcu(rlist, mc_router_list); /* For backwards compatibility for now, only notify if we * switched from no IPv4/IPv6 multicast router to a new * IPv4 or IPv6 multicast router. */ if (br_multicast_no_router_otherpf(pmctx, rlist)) { br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB); br_port_mc_router_state_change(pmctx->port, true); } } /* Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx) { br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist, &brmctx->ip4_mc_router_list); } /* Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist, &brmctx->ip6_mc_router_list); #endif } static void br_multicast_mark_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct timer_list *timer, struct hlist_node *rlist, struct hlist_head *mc_router_list) { unsigned long now = jiffies; if (!br_multicast_ctx_should_use(brmctx, pmctx)) return; if (!pmctx) { if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { if (!br_ip4_multicast_is_router(brmctx) && !br_ip6_multicast_is_router(brmctx)) br_mc_router_state_change(brmctx->br, true); mod_timer(timer, now + brmctx->multicast_querier_interval); } return; } if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || pmctx->multicast_router == MDB_RTR_TYPE_PERM) return; br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list); mod_timer(timer, now + brmctx->multicast_querier_interval); } static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx) { struct timer_list *timer = &brmctx->ip4_mc_router_timer; struct hlist_node *rlist = NULL; if (pmctx) { timer = &pmctx->ip4_mc_router_timer; rlist = &pmctx->ip4_rlist; } br_multicast_mark_router(brmctx, pmctx, timer, rlist, &brmctx->ip4_mc_router_list); } static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) struct timer_list *timer = &brmctx->ip6_mc_router_timer; struct hlist_node *rlist = NULL; if (pmctx) { timer = &pmctx->ip6_mc_router_timer; rlist = &pmctx->ip6_rlist; } br_multicast_mark_router(brmctx, pmctx, timer, rlist, &brmctx->ip6_mc_router_list); #endif } static void br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); br_ip4_multicast_mark_router(brmctx, pmctx); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); br_ip6_multicast_mark_router(brmctx, pmctx); } #endif static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { unsigned int transport_len = ip_transport_len(skb); const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); struct net_bridge_mdb_entry *mp; struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; __be32 group; spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; group = ih->group; if (transport_len == sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs || (brmctx->multicast_igmp_version == 3 && group && ih3->suppress)) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } else { goto out; } if (!group) { saddr.proto = htons(ETH_P_IP); saddr.src.ip4 = iph->saddr; br_ip4_multicast_query_received(brmctx, pmctx, &brmctx->ip4_other_query, &saddr, max_delay); goto out; } mp = br_mdb_ip4_get(brmctx->br, group, vid); if (!mp) goto out; max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && (brmctx->multicast_igmp_version == 2 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: spin_unlock(&brmctx->br->multicast_lock); } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { unsigned int transport_len = ipv6_transport_len(skb); struct mld_msg *mld; struct net_bridge_mdb_entry *mp; struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; if (transport_len == sizeof(*mld)) { if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } mld = (struct mld_msg *) icmp6_hdr(skb); max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; } else { if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; if (brmctx->multicast_mld_version == 2 && !ipv6_addr_any(&mld2q->mld2q_mca) && mld2q->mld2q_suppress) goto out; max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } is_general_query = group && ipv6_addr_any(group); if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); saddr.src.ip6 = ipv6_hdr(skb)->saddr; br_ip6_multicast_query_received(brmctx, pmctx, &brmctx->ip6_other_query, &saddr, max_delay); goto out; } else if (!group) { goto out; } mp = br_mdb_ip6_get(brmctx->br, group, vid); if (!mp) goto out; max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && (brmctx->multicast_mld_version == 1 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: spin_unlock(&brmctx->br->multicast_lock); return err; } #endif static void br_multicast_leave_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct br_ip *group, struct bridge_mcast_other_query *other_query, struct bridge_mcast_own_query *own_query, const unsigned char *src) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; unsigned long now; unsigned long time; spin_lock(&brmctx->br->multicast_lock); if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; mp = br_mdb_ip_get(brmctx->br, group); if (!mp) goto out; if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (!br_port_group_equal(p, pmctx->port, src)) continue; if (p->flags & MDB_PG_FLAGS_PERMANENT) break; p->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_del_pg(mp, p, pp); } goto out; } if (timer_pending(&other_query->timer)) goto out; if (brmctx->multicast_querier) { __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr, false, 0, NULL); time = jiffies + brmctx->multicast_last_member_count * brmctx->multicast_last_member_interval; mod_timer(&own_query->timer, time); for (p = mlock_dereference(mp->ports, brmctx->br); p != NULL && pmctx != NULL; p = mlock_dereference(p->next, brmctx->br)) { if (!br_port_group_equal(p, pmctx->port, src)) continue; if (!hlist_unhashed(&p->mglist) && (timer_pending(&p->timer) ? time_after(p->timer.expires, time) : try_to_del_timer_sync(&p->timer) >= 0)) { mod_timer(&p->timer, time); } break; } } now = jiffies; time = now + brmctx->multicast_last_member_count * brmctx->multicast_last_member_interval; if (!pmctx) { if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } goto out; } for (p = mlock_dereference(mp->ports, brmctx->br); p != NULL; p = mlock_dereference(p->next, brmctx->br)) { if (p->key.port != pmctx->port) continue; if (!hlist_unhashed(&p->mglist) && (timer_pending(&p->timer) ? time_after(p->timer.expires, time) : try_to_del_timer_sync(&p->timer) >= 0)) { mod_timer(&p->timer, time); } break; } out: spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src) { struct br_ip br_group; struct bridge_mcast_own_query *own_query; if (ipv4_is_local_multicast(group)) return; own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; br_multicast_leave_group(brmctx, pmctx, &br_group, &brmctx->ip4_other_query, own_query, src); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src) { struct br_ip br_group; struct bridge_mcast_own_query *own_query; if (ipv6_addr_is_ll_all_nodes(group)) return; own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; br_multicast_leave_group(brmctx, pmctx, &br_group, &brmctx->ip6_other_query, own_query, src); } #endif static void br_multicast_err_count(const struct net_bridge *br, const struct net_bridge_port *p, __be16 proto) { struct bridge_mcast_stats __percpu *stats; struct bridge_mcast_stats *pstats; if (!br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) return; if (p) stats = p->mcast_stats; else stats = br->mcast_stats; if (WARN_ON(!stats)) return; pstats = this_cpu_ptr(stats); u64_stats_update_begin(&pstats->syncp); switch (proto) { case htons(ETH_P_IP): pstats->mstats.igmp_parse_errors++; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): pstats->mstats.mld_parse_errors++; break; #endif } u64_stats_update_end(&pstats->syncp); } static void br_multicast_pim(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, const struct sk_buff *skb) { unsigned int offset = skb_transport_offset(skb); struct pimhdr *pimhdr, _pimhdr; pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr); if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION || pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) return; spin_lock(&brmctx->br->multicast_lock); br_ip4_multicast_mark_router(brmctx, pmctx); spin_unlock(&brmctx->br->multicast_lock); } static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (ip_hdr(skb)->protocol != IPPROTO_IGMP || igmp_hdr(skb)->type != IGMP_MRDISC_ADV) return -ENOMSG; spin_lock(&brmctx->br->multicast_lock); br_ip4_multicast_mark_router(brmctx, pmctx); spin_unlock(&brmctx->br->multicast_lock); return 0; } static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct igmphdr *ih; int err; err = ip_mc_check_igmp(skb); if (err == -ENOMSG) { if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { BR_INPUT_SKB_CB(skb)->mrouters_only = 1; } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { if (ip_hdr(skb)->protocol == IPPROTO_PIM) br_multicast_pim(brmctx, pmctx, skb); } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb); } return 0; } else if (err < 0) { br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } ih = igmp_hdr(skb); src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->igmp = ih->type; switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid, src, true); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: br_ip4_multicast_query(brmctx, pmctx, skb, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src); break; } br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) return; spin_lock(&brmctx->br->multicast_lock); br_ip6_multicast_mark_router(brmctx, pmctx); spin_unlock(&brmctx->br->multicast_lock); } static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct mld_msg *mld; int err; err = ipv6_mc_check_mld(skb); if (err == -ENOMSG || err == -ENODATA) { if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; if (err == -ENODATA && ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb); return 0; } else if (err < 0) { br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } mld = (struct mld_msg *)skb_transport_header(skb); BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type; switch (mld->mld_type) { case ICMPV6_MGM_REPORT: src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca, vid, src, true); break; case ICMPV6_MLD2_REPORT: err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_QUERY: err = br_ip6_multicast_query(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_REDUCTION: src = eth_hdr(skb)->h_source; br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid, src); break; } br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #endif int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { int ret = 0; BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED)) return 0; if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) { const struct net_bridge_vlan *masterv; /* the vlan has the master flag set only when transmitting * through the bridge device */ if (br_vlan_is_master(vlan)) { masterv = vlan; *brmctx = &vlan->br_mcast_ctx; *pmctx = NULL; } else { masterv = vlan->brvlan; *brmctx = &vlan->brvlan->br_mcast_ctx; *pmctx = &vlan->port_mcast_ctx; } if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) return 0; } switch (skb->protocol) { case htons(ETH_P_IP): ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid); break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid); break; #endif } return ret; } static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query, struct bridge_mcast_querier *querier) { spin_lock(&brmctx->br->multicast_lock); if (br_multicast_ctx_vlan_disabled(brmctx)) goto out; if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; br_multicast_send_query(brmctx, NULL, query); out: spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_query_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip4_own_query.timer); br_multicast_query_expired(brmctx, &brmctx->ip4_own_query, &brmctx->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_query_expired(struct timer_list *t) { struct net_bridge_mcast *brmctx = from_timer(brmctx, t, ip6_own_query.timer); br_multicast_query_expired(brmctx, &brmctx->ip6_own_query, &brmctx->ip6_querier); } #endif static void br_multicast_gc_work(struct work_struct *work) { struct net_bridge *br = container_of(work, struct net_bridge, mcast_gc_work); HLIST_HEAD(deleted_head); spin_lock_bh(&br->multicast_lock); hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); br_multicast_gc(&deleted_head); } void br_multicast_ctx_init(struct net_bridge *br, struct net_bridge_vlan *vlan, struct net_bridge_mcast *brmctx) { brmctx->br = br; brmctx->vlan = vlan; brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; brmctx->multicast_last_member_count = 2; brmctx->multicast_startup_query_count = 2; brmctx->multicast_last_member_interval = HZ; brmctx->multicast_query_response_interval = 10 * HZ; brmctx->multicast_startup_query_interval = 125 * HZ / 4; brmctx->multicast_query_interval = 125 * HZ; brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif timer_setup(&brmctx->ip4_mc_router_timer, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); timer_setup(&brmctx->ip4_other_query.delay_timer, br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) timer_setup(&brmctx->ip6_mc_router_timer, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); timer_setup(&brmctx->ip6_other_query.delay_timer, br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif } void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) { __br_multicast_stop(brmctx); } void br_multicast_init(struct net_bridge *br) { br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; br_multicast_ctx_init(br, NULL, &br->multicast_ctx); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); spin_lock_init(&br->multicast_lock); INIT_HLIST_HEAD(&br->mdb_list); INIT_HLIST_HEAD(&br->mcast_gc_list); INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work); } static void br_ip4_multicast_join_snoopers(struct net_bridge *br) { struct in_device *in_dev = in_dev_get(br->dev); if (!in_dev) return; __ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_join_snoopers(struct net_bridge *br) { struct in6_addr addr; ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); ipv6_dev_mc_inc(br->dev, &addr); } #else static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) { } #endif void br_multicast_join_snoopers(struct net_bridge *br) { br_ip4_multicast_join_snoopers(br); br_ip6_multicast_join_snoopers(br); } static void br_ip4_multicast_leave_snoopers(struct net_bridge *br) { struct in_device *in_dev = in_dev_get(br->dev); if (WARN_ON(!in_dev)) return; __ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_snoopers(struct net_bridge *br) { struct in6_addr addr; ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); ipv6_dev_mc_dec(br->dev, &addr); } #else static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) { } #endif void br_multicast_leave_snoopers(struct net_bridge *br) { br_ip4_multicast_leave_snoopers(br); br_ip6_multicast_leave_snoopers(br); } static void __br_multicast_open_query(struct net_bridge *br, struct bridge_mcast_own_query *query) { query->startup_sent = 0; if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return; mod_timer(&query->timer, jiffies); } static void __br_multicast_open(struct net_bridge_mcast *brmctx) { __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query); #endif } void br_multicast_open(struct net_bridge *br) { ASSERT_RTNL(); if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; vg = br_vlan_group(br); if (vg) { list_for_each_entry(vlan, &vg->vlan_list, vlist) { struct net_bridge_mcast *brmctx; brmctx = &vlan->br_mcast_ctx; if (br_vlan_is_brentry(vlan) && !br_multicast_ctx_vlan_disabled(brmctx)) __br_multicast_open(&vlan->br_mcast_ctx); } } } else { __br_multicast_open(&br->multicast_ctx); } } static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge *br; /* it's okay to check for the flag without the multicast lock because it * can only change under RTNL -> multicast_lock, we need the latter to * sync with timers and packets */ if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) return; if (br_vlan_is_master(vlan)) { br = vlan->br; if (!br_vlan_is_brentry(vlan) || (on && br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx))) return; spin_lock_bh(&br->multicast_lock); vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; spin_unlock_bh(&br->multicast_lock); if (on) __br_multicast_open(&vlan->br_mcast_ctx); else __br_multicast_stop(&vlan->br_mcast_ctx); } else { struct net_bridge_mcast *brmctx; brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx); if (on && br_multicast_ctx_vlan_global_disabled(brmctx)) return; br = vlan->port->br; spin_lock_bh(&br->multicast_lock); vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; if (on) __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx); else __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx); spin_unlock_bh(&br->multicast_lock); } } static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge_port *p; if (WARN_ON_ONCE(!br_vlan_is_master(vlan))) return; list_for_each_entry(p, &vlan->br->port_list, list) { struct net_bridge_vlan *vport; vport = br_vlan_find(nbp_vlan_group(p), vlan->vid); if (!vport) continue; br_multicast_toggle_one_vlan(vport, on); } if (br_vlan_is_brentry(vlan)) br_multicast_toggle_one_vlan(vlan, on); } int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; struct net_bridge_port *p; if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on) return 0; if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) { NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled"); return -EINVAL; } vg = br_vlan_group(br); if (!vg) return 0; br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on); /* disable/enable non-vlan mcast contexts based on vlan snooping */ if (on) __br_multicast_stop(&br->multicast_ctx); else __br_multicast_open(&br->multicast_ctx); list_for_each_entry(p, &br->port_list, list) { if (on) br_multicast_disable_port(p); else br_multicast_enable_port(p); } list_for_each_entry(vlan, &vg->vlan_list, vlist) br_multicast_toggle_vlan(vlan, on); return 0; } bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on) { ASSERT_RTNL(); /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and * requires only RTNL to change */ if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) return false; vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED; br_multicast_toggle_vlan(vlan, on); return true; } void br_multicast_stop(struct net_bridge *br) { ASSERT_RTNL(); if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; vg = br_vlan_group(br); if (vg) { list_for_each_entry(vlan, &vg->vlan_list, vlist) { struct net_bridge_mcast *brmctx; brmctx = &vlan->br_mcast_ctx; if (br_vlan_is_brentry(vlan) && !br_multicast_ctx_vlan_disabled(brmctx)) __br_multicast_stop(&vlan->br_mcast_ctx); } } } else { __br_multicast_stop(&br->multicast_ctx); } } void br_multicast_dev_del(struct net_bridge *br) { struct net_bridge_mdb_entry *mp; HLIST_HEAD(deleted_head); struct hlist_node *tmp; spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) br_multicast_del_mdb_entry(mp); hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); br_multicast_ctx_deinit(&br->multicast_ctx); br_multicast_gc(&deleted_head); cancel_work_sync(&br->mcast_gc_work); rcu_barrier(); } int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val) { int err = -EINVAL; spin_lock_bh(&brmctx->br->multicast_lock); switch (val) { case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM); del_timer(&brmctx->ip4_mc_router_timer); #if IS_ENABLED(CONFIG_IPV6) del_timer(&brmctx->ip6_mc_router_timer); #endif brmctx->multicast_router = val; err = 0; break; case MDB_RTR_TYPE_TEMP_QUERY: if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) br_mc_router_state_change(brmctx->br, false); brmctx->multicast_router = val; err = 0; break; } spin_unlock_bh(&brmctx->br->multicast_lock); return err; } static void br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted) { if (!deleted) return; /* For backwards compatibility for now, only notify if there is * no multicast router anymore for both IPv4 and IPv6. */ if (!hlist_unhashed(&pmctx->ip4_rlist)) return; #if IS_ENABLED(CONFIG_IPV6) if (!hlist_unhashed(&pmctx->ip6_rlist)) return; #endif br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB); br_port_mc_router_state_change(pmctx->port, false); /* don't allow timer refresh */ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, unsigned long val) { struct net_bridge_mcast *brmctx; unsigned long now = jiffies; int err = -EINVAL; bool del = false; brmctx = br_multicast_port_ctx_get_global(pmctx); spin_lock_bh(&brmctx->br->multicast_lock); if (pmctx->multicast_router == val) { /* Refresh the temp router port timer */ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) { mod_timer(&pmctx->ip4_mc_router_timer, now + brmctx->multicast_querier_interval); #if IS_ENABLED(CONFIG_IPV6) mod_timer(&pmctx->ip6_mc_router_timer, now + brmctx->multicast_querier_interval); #endif } err = 0; goto unlock; } switch (val) { case MDB_RTR_TYPE_DISABLED: pmctx->multicast_router = MDB_RTR_TYPE_DISABLED; del |= br_ip4_multicast_rport_del(pmctx); del_timer(&pmctx->ip4_mc_router_timer); del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) del_timer(&pmctx->ip6_mc_router_timer); #endif br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_TEMP_QUERY: pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; del |= br_ip4_multicast_rport_del(pmctx); del |= br_ip6_multicast_rport_del(pmctx); br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_PERM: pmctx->multicast_router = MDB_RTR_TYPE_PERM; del_timer(&pmctx->ip4_mc_router_timer); br_ip4_multicast_add_router(brmctx, pmctx); #if IS_ENABLED(CONFIG_IPV6) del_timer(&pmctx->ip6_mc_router_timer); #endif br_ip6_multicast_add_router(brmctx, pmctx); break; case MDB_RTR_TYPE_TEMP: pmctx->multicast_router = MDB_RTR_TYPE_TEMP; br_ip4_multicast_mark_router(brmctx, pmctx); br_ip6_multicast_mark_router(brmctx, pmctx); break; default: goto unlock; } err = 0; unlock: spin_unlock_bh(&brmctx->br->multicast_lock); return err; } int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router) { int err; if (br_vlan_is_master(v)) err = br_multicast_set_router(&v->br_mcast_ctx, mcast_router); else err = br_multicast_set_port_router(&v->port_mcast_ctx, mcast_router); return err; } static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { struct net_bridge_port *port; if (!br_multicast_ctx_matches_vlan_snooping(brmctx)) return; __br_multicast_open_query(brmctx->br, query); rcu_read_lock(); list_for_each_entry_rcu(port, &brmctx->br->port_list, list) { struct bridge_mcast_own_query *ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_own_query *ip6_own_query; #endif if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx)) continue; if (br_multicast_ctx_is_vlan(brmctx)) { struct net_bridge_vlan *vlan; vlan = br_vlan_find(nbp_vlan_group_rcu(port), brmctx->vlan->vid); if (!vlan || br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx)) continue; ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query; #endif } else { ip4_own_query = &port->multicast_ctx.ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) ip6_own_query = &port->multicast_ctx.ip6_own_query; #endif } if (query == &brmctx->ip4_own_query) br_multicast_enable(ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) else br_multicast_enable(ip6_own_query); #endif } rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct net_bridge_port *port; bool change_snoopers = false; int err = 0; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) goto unlock; err = br_mc_disabled_update(br->dev, val, extack); if (err == -EOPNOTSUPP) err = 0; if (err) goto unlock; br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; goto unlock; } if (!netif_running(br->dev)) goto unlock; br_multicast_open(br); list_for_each_entry(port, &br->port_list, list) __br_multicast_enable_port_ctx(&port->multicast_ctx); change_snoopers = true; unlock: spin_unlock_bh(&br->multicast_lock); /* br_multicast_join_snoopers has the potential to cause * an MLD Report/Leave to be delivered to br_multicast_rcv, * which would in turn call br_multicast_add_group, which would * attempt to acquire multicast_lock. This function should be * called after the lock has been released to avoid deadlocks on * multicast_lock. * * br_multicast_leave_snoopers does not have the problem since * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and * returns without calling br_multicast_ipv4/6_rcv if it's not * enabled. Moved both functions out just for symmetry. */ if (change_snoopers) { if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) br_multicast_join_snoopers(br); else br_multicast_leave_snoopers(br); } return err; } bool br_multicast_enabled(const struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); return !!br_opt_get(br, BROPT_MULTICAST_ENABLED); } EXPORT_SYMBOL_GPL(br_multicast_enabled); bool br_multicast_router(const struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); bool is_router; spin_lock_bh(&br->multicast_lock); is_router = br_multicast_is_router(&br->multicast_ctx, NULL); spin_unlock_bh(&br->multicast_lock); return is_router; } EXPORT_SYMBOL_GPL(br_multicast_router); int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) { unsigned long max_delay; val = !!val; spin_lock_bh(&brmctx->br->multicast_lock); if (brmctx->multicast_querier == val) goto unlock; WRITE_ONCE(brmctx->multicast_querier, val); if (!val) goto unlock; max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) mod_timer(&brmctx->ip4_other_query.delay_timer, jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) mod_timer(&brmctx->ip6_other_query.delay_timer, jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif unlock: spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx, unsigned long val) { /* Currently we support only version 2 and 3 */ switch (val) { case 2: case 3: break; default: return -EINVAL; } spin_lock_bh(&brmctx->br->multicast_lock); brmctx->multicast_igmp_version = val; spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } #if IS_ENABLED(CONFIG_IPV6) int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, unsigned long val) { /* Currently we support version 1 and 2 */ switch (val) { case 1: case 2: break; default: return -EINVAL; } spin_lock_bh(&brmctx->br->multicast_lock); brmctx->multicast_mld_version = val; spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } #endif void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val) { unsigned long intvl_jiffies = clock_t_to_jiffies(val); if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { br_info(brmctx->br, "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; } brmctx->multicast_query_interval = intvl_jiffies; } void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val) { unsigned long intvl_jiffies = clock_t_to_jiffies(val); if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { br_info(brmctx->br, "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; } brmctx->multicast_startup_query_interval = intvl_jiffies; } /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses * @br_ip_list: The list to store found, snooped multicast IP addresses in * * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast * snooping feature on all bridge ports of dev's bridge device, excluding * the addresses from dev itself. * * Returns the number of items added to br_ip_list. * * Notes: * - br_ip_list needs to be initialized by caller * - br_ip_list might contain duplicates in the end * (needs to be taken care of by caller) * - br_ip_list needs to be freed by caller */ int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) { struct net_bridge *br; struct net_bridge_port *port; struct net_bridge_port_group *group; struct br_ip_list *entry; int count = 0; rcu_read_lock(); if (!br_ip_list || !netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); if (!port || !port->br) goto unlock; br = port->br; list_for_each_entry_rcu(port, &br->port_list, list) { if (!port->dev || port->dev == dev) continue; hlist_for_each_entry_rcu(group, &port->mglist, mglist) { entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) goto unlock; entry->addr = group->key.addr; list_add(&entry->list, br_ip_list); count++; } } unlock: rcu_read_unlock(); return count; } EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); /** * br_multicast_has_querier_anywhere - Checks for a querier on a bridge * @dev: The bridge port providing the bridge on which to check for a querier * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 * * Checks whether the given interface has a bridge on top and if so returns * true if a valid querier exists anywhere on the bridged link layer. * Otherwise returns false. */ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) { struct net_bridge *br; struct net_bridge_port *port; struct ethhdr eth; bool ret = false; rcu_read_lock(); if (!netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); if (!port || !port->br) goto unlock; br = port->br; memset(&eth, 0, sizeof(eth)); eth.h_proto = htons(proto); ret = br_multicast_querier_exists(&br->multicast_ctx, &eth, NULL); unlock: rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); /** * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port * @dev: The bridge port adjacent to which to check for a querier * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 * * Checks whether the given interface has a bridge on top and if so returns * true if a selected querier is behind one of the other ports of this * bridge. Otherwise returns false. */ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) { struct net_bridge_mcast *brmctx; struct net_bridge *br; struct net_bridge_port *port; bool ret = false; int port_ifidx; rcu_read_lock(); if (!netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); if (!port || !port->br) goto unlock; br = port->br; brmctx = &br->multicast_ctx; switch (proto) { case ETH_P_IP: port_ifidx = brmctx->ip4_querier.port_ifidx; if (!timer_pending(&brmctx->ip4_other_query.timer) || port_ifidx == port->dev->ifindex) goto unlock; break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: port_ifidx = brmctx->ip6_querier.port_ifidx; if (!timer_pending(&brmctx->ip6_other_query.timer) || port_ifidx == port->dev->ifindex) goto unlock; break; #endif default: goto unlock; } ret = true; unlock: rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); /** * br_multicast_has_router_adjacent - Checks for a router behind a bridge port * @dev: The bridge port adjacent to which to check for a multicast router * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 * * Checks whether the given interface has a bridge on top and if so returns * true if a multicast router is behind one of the other ports of this * bridge. Otherwise returns false. */ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) { struct net_bridge_mcast_port *pmctx; struct net_bridge_mcast *brmctx; struct net_bridge_port *port; bool ret = false; rcu_read_lock(); port = br_port_get_check_rcu(dev); if (!port) goto unlock; brmctx = &port->br->multicast_ctx; switch (proto) { case ETH_P_IP: hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, ip4_rlist) { if (pmctx->port == port) continue; ret = true; goto unlock; } break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, ip6_rlist) { if (pmctx->port == port) continue; ret = true; goto unlock; } break; #endif default: /* when compiled without IPv6 support, be conservative and * always assume presence of an IPv6 multicast router */ ret = true; } unlock: rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(br_multicast_has_router_adjacent); static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, const struct sk_buff *skb, u8 type, u8 dir) { struct bridge_mcast_stats *pstats = this_cpu_ptr(stats); __be16 proto = skb->protocol; unsigned int t_len; u64_stats_update_begin(&pstats->syncp); switch (proto) { case htons(ETH_P_IP): t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); switch (type) { case IGMP_HOST_MEMBERSHIP_REPORT: pstats->mstats.igmp_v1reports[dir]++; break; case IGMPV2_HOST_MEMBERSHIP_REPORT: pstats->mstats.igmp_v2reports[dir]++; break; case IGMPV3_HOST_MEMBERSHIP_REPORT: pstats->mstats.igmp_v3reports[dir]++; break; case IGMP_HOST_MEMBERSHIP_QUERY: if (t_len != sizeof(struct igmphdr)) { pstats->mstats.igmp_v3queries[dir]++; } else { unsigned int offset = skb_transport_offset(skb); struct igmphdr *ih, _ihdr; ih = skb_header_pointer(skb, offset, sizeof(_ihdr), &_ihdr); if (!ih) break; if (!ih->code) pstats->mstats.igmp_v1queries[dir]++; else pstats->mstats.igmp_v2queries[dir]++; } break; case IGMP_HOST_LEAVE_MESSAGE: pstats->mstats.igmp_leaves[dir]++; break; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): t_len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); t_len -= skb_network_header_len(skb); switch (type) { case ICMPV6_MGM_REPORT: pstats->mstats.mld_v1reports[dir]++; break; case ICMPV6_MLD2_REPORT: pstats->mstats.mld_v2reports[dir]++; break; case ICMPV6_MGM_QUERY: if (t_len != sizeof(struct mld_msg)) pstats->mstats.mld_v2queries[dir]++; else pstats->mstats.mld_v1queries[dir]++; break; case ICMPV6_MGM_REDUCTION: pstats->mstats.mld_leaves[dir]++; break; } break; #endif /* CONFIG_IPV6 */ } u64_stats_update_end(&pstats->syncp); } void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir) { struct bridge_mcast_stats __percpu *stats; /* if multicast_disabled is true then igmp type can't be set */ if (!type || !br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) return; if (p) stats = p->mcast_stats; else stats = br->mcast_stats; if (WARN_ON(!stats)) return; br_mcast_stats_add(stats, skb, type, dir); } int br_multicast_init_stats(struct net_bridge *br) { br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!br->mcast_stats) return -ENOMEM; return 0; } void br_multicast_uninit_stats(struct net_bridge *br) { free_percpu(br->mcast_stats); } /* noinline for https://llvm.org/pr45802#c9 */ static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src) { dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX]; } void br_multicast_get_stats(const struct net_bridge *br, const struct net_bridge_port *p, struct br_mcast_stats *dest) { struct bridge_mcast_stats __percpu *stats; struct br_mcast_stats tdst; int i; memset(dest, 0, sizeof(*dest)); if (p) stats = p->mcast_stats; else stats = br->mcast_stats; if (WARN_ON(!stats)) return; memset(&tdst, 0, sizeof(tdst)); for_each_possible_cpu(i) { struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i); struct br_mcast_stats temp; unsigned int start; do { start = u64_stats_fetch_begin(&cpu_stats->syncp); memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries); mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves); mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports); mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports); mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports); tdst.igmp_parse_errors += temp.igmp_parse_errors; mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries); mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries); mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves); mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports); mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports); tdst.mld_parse_errors += temp.mld_parse_errors; } memcpy(dest, &tdst, sizeof(*dest)); } int br_mdb_hash_init(struct net_bridge *br) { int err; err = rhashtable_init(&br->sg_port_tbl, &br_sg_port_rht_params); if (err) return err; err = rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params); if (err) { rhashtable_destroy(&br->sg_port_tbl); return err; } return 0; } void br_mdb_hash_fini(struct net_bridge *br) { rhashtable_destroy(&br->sg_port_tbl); rhashtable_destroy(&br->mdb_hash_tbl); }
4 48 93 46 5 2 3 72 2 4 19 4 4 18 2 12 90 15 4 155 1 156 156 156 1 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _NET_GRO_H #define _NET_GRO_H #include <linux/indirect_call_wrapper.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/ip6_checksum.h> #include <linux/skbuff.h> #include <net/udp.h> #include <net/hotdata.h> /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) struct napi_gro_cb { union { struct { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ void *frag0; /* Length of frag0. */ unsigned int frag0_len; }; struct { /* used in skb_gro_receive() slow path */ struct sk_buff *last; /* jiffies when first packet was created/queued */ unsigned long age; }; }; /* This indicates where we are processing relative to skb->data. */ int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ u16 flush; /* Number of segments aggregated. */ u16 count; /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ u16 proto; u16 pad; /* Used in napi_gro_cb::free */ #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 /* portion of the cb set to zero at every gro iteration */ struct_group(zeroed, /* Start offset for remote checksum offload */ u16 gro_remcsum_start; /* This is non-zero if the packet may be of the same flow. */ u8 same_flow:1; /* Used in tunnel GRO receive */ u8 encap_mark:1; /* GRO checksum is valid */ u8 csum_valid:1; /* Number of checksums via CHECKSUM_UNNECESSARY */ u8 csum_cnt:3; /* Free the skb? */ u8 free:2; /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; /* Used to determine if ipid_offset can be ignored */ u8 ip_fixedid:1; /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; /* GRO is done by frag_list pointer chaining. */ u8 is_flist:1; ); /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; /* L3 offsets */ union { struct { u16 network_offset; u16 inner_network_offset; }; u16 network_offsets[2]; }; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) #define GRO_RECURSION_LIMIT 15 static inline int gro_recursion_inc_test(struct sk_buff *skb) { return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; } typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); static inline struct sk_buff *call_gro_receive(gro_receive_t cb, struct list_head *head, struct sk_buff *skb) { if (unlikely(gro_recursion_inc_test(skb))) { NAPI_GRO_CB(skb)->flush |= 1; return NULL; } return cb(head, skb); } typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, struct sk_buff *); static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, struct sock *sk, struct list_head *head, struct sk_buff *skb) { if (unlikely(gro_recursion_inc_test(skb))) { NAPI_GRO_CB(skb)->flush |= 1; return NULL; } return cb(sk, head, skb); } static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { return NAPI_GRO_CB(skb)->data_offset; } static inline unsigned int skb_gro_len(const struct sk_buff *skb) { return skb->len - NAPI_GRO_CB(skb)->data_offset; } static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) { NAPI_GRO_CB(skb)->data_offset += len; } static inline void *skb_gro_header_fast(const struct sk_buff *skb, unsigned int offset) { return NAPI_GRO_CB(skb)->frag0 + offset; } static inline bool skb_gro_may_pull(const struct sk_buff *skb, unsigned int hlen) { return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; return skb->data + offset; } static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { void *ptr; ptr = skb_gro_header_fast(skb, offset); if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) { return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; } static inline void *skb_gro_network_header(const struct sk_buff *skb) { if (skb_gro_may_pull(skb, skb_gro_offset(skb))) return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); return skb->data + skb_gro_receive_network_offset(skb); } static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, int proto) { const struct iphdr *iph = skb_gro_network_header(skb); return csum_tcpudp_nofold(iph->saddr, iph->daddr, skb_gro_len(skb), proto, 0); } static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, wsum_negate(NAPI_GRO_CB(skb)->csum))); } /* GRO checksum functions. These are logical equivalents of the normal * checksum functions (in skbuff.h) except that they operate on the GRO * offsets and fields in sk_buff. */ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); } static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) { return ((skb->ip_summed != CHECKSUM_PARTIAL || skb_checksum_start_offset(skb) < skb_gro_offset(skb)) && !skb_at_gro_remcsum_start(skb) && NAPI_GRO_CB(skb)->csum_cnt == 0 && (!zero_okay || check)); } static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, __wsum psum) { if (NAPI_GRO_CB(skb)->csum_valid && !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) return 0; NAPI_GRO_CB(skb)->csum = psum; return __skb_gro_checksum_complete(skb); } static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) { if (NAPI_GRO_CB(skb)->csum_cnt > 0) { /* Consume a checksum from CHECKSUM_UNNECESSARY */ NAPI_GRO_CB(skb)->csum_cnt--; } else { /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we * verified a new top level checksum or an encapsulated one * during GRO. This saves work if we fallback to normal path. */ __skb_incr_checksum_unnecessary(skb); } } #define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ compute_pseudo) \ ({ \ __sum16 __ret = 0; \ if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_gro_checksum_validate_complete(skb, \ compute_pseudo(skb, proto)); \ if (!__ret) \ skb_gro_incr_csum_unnecessary(skb); \ __ret; \ }) #define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) #define skb_gro_checksum_validate_zero_check(skb, proto, check, \ compute_pseudo) \ __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) #define skb_gro_checksum_simple_validate(skb) \ __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid); } static inline void __skb_gro_checksum_convert(struct sk_buff *skb, __wsum pseudo) { NAPI_GRO_CB(skb)->csum = ~pseudo; NAPI_GRO_CB(skb)->csum_valid = 1; } #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ do { \ if (__skb_gro_checksum_convert_check(skb)) \ __skb_gro_checksum_convert(skb, \ compute_pseudo(skb, proto)); \ } while (0) struct gro_remcsum { int offset; __wsum delta; }; static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) { grc->offset = 0; grc->delta = 0; } static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, unsigned int off, size_t hdrlen, int start, int offset, struct gro_remcsum *grc, bool nopartial) { __wsum delta; size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); if (!nopartial) { NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; return ptr; } ptr = skb_gro_header(skb, off + plen, off); if (!ptr) return NULL; delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, start, offset); /* Adjust skb->csum since we changed the packet */ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); grc->offset = off + hdrlen + offset; grc->delta = delta; return ptr; } static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, struct gro_remcsum *grc) { void *ptr; size_t plen = grc->offset + sizeof(u16); if (!grc->delta) return; ptr = skb_gro_header(skb, plen, grc->offset); if (!ptr) return; remcsum_unadjust((__sum16 *)ptr, grc->delta); } #ifdef CONFIG_XFRM_OFFLOAD static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) { if (PTR_ERR(pp) != -EINPROGRESS) NAPI_GRO_CB(skb)->flush |= flush; } static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, struct sk_buff *pp, int flush, struct gro_remcsum *grc) { if (PTR_ERR(pp) != -EINPROGRESS) { NAPI_GRO_CB(skb)->flush |= flush; skb_gro_remcsum_cleanup(skb, grc); skb->remcsum_offload = 0; } } #else static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, struct sk_buff *pp, int flush, struct gro_remcsum *grc) { NAPI_GRO_CB(skb)->flush |= flush; skb_gro_remcsum_cleanup(skb, grc); skb->remcsum_offload = 0; } #endif INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ NAPI_GRO_CB(skb)->flush |= 1, NULL : \ INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ }) struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { struct udphdr *uh; unsigned int hlen, off; off = skb_gro_offset(skb); hlen = off + sizeof(*uh); uh = skb_gro_header(skb, hlen, off); return uh; } static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, int proto) { const struct ipv6hdr *iph = skb_gro_network_header(skb); return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, skb_gro_len(skb), proto, 0)); } static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, struct sk_buff *p, bool outer) { const u32 id = ntohl(*(__be32 *)&iph->id); const u32 id2 = ntohl(*(__be32 *)&iph2->id); const u16 ipid_offset = (id >> 16) - (id2 >> 16); const u16 count = NAPI_GRO_CB(p)->count; const u32 df = id & IP_DF; int flush; /* All fields must match except length and checksum. */ flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); if (flush | (outer && df)) return flush; /* When we receive our second frame we can make a decision on if we * continue this flow as an atomic flow with a fixed ID or if we use * an incrementing ID. */ if (count == 1 && df && !ipid_offset) NAPI_GRO_CB(p)->ip_fixedid = true; return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); } static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) { /* <Version:4><Traffic_Class:8><Flow_Label:20> */ __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; /* Flush if Traffic Class fields are different. */ return !!((first_word & htonl(0x0FF00000)) | (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); } static inline int __gro_receive_network_flush(const void *th, const void *th2, struct sk_buff *p, const u16 diff, bool outer) { const void *nh = th - diff; const void *nh2 = th2 - diff; if (((struct iphdr *)nh)->version == 6) return ipv6_gro_flush(nh, nh2); else return inet_gro_flush(nh, nh2, p, outer); } static inline int gro_receive_network_flush(const void *th, const void *th2, struct sk_buff *p) { const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; int off = skb_transport_offset(p); int flush; flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); if (encap_mark) flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); return flush; } int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); void __gro_flush(struct gro_node *gro, bool flush_old); static inline void gro_flush(struct gro_node *gro, bool flush_old) { if (!gro->bitmask) return; __gro_flush(gro, flush_old); } static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) { gro_flush(&napi->gro, flush_old); } /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ static inline void gro_normal_list(struct gro_node *gro) { if (!gro->rx_count) return; netif_receive_skb_list_internal(&gro->rx_list); INIT_LIST_HEAD(&gro->rx_list); gro->rx_count = 0; } /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, int segs) { list_add_tail(&skb->list, &gro->rx_list); gro->rx_count += segs; if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) gro_normal_list(gro); } void gro_init(struct gro_node *gro); void gro_cleanup(struct gro_node *gro); /* This function is the alternative of 'inet_iif' and 'inet_sdif' * functions in case we can not rely on fields of IPCB. * * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. * The caller must hold the RCU read lock. */ static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) { *iif = inet_iif(skb) ?: skb->dev->ifindex; *sdif = 0; #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (netif_is_l3_slave(skb->dev)) { struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); *sdif = *iif; *iif = master ? master->ifindex : 0; } #endif } /* This function is the alternative of 'inet6_iif' and 'inet6_sdif' * functions in case we can not rely on fields of IP6CB. * * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. * The caller must hold the RCU read lock. */ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) { /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ *iif = skb->dev->ifindex; *sdif = 0; #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (netif_is_l3_slave(skb->dev)) { struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); *sdif = *iif; *iif = master ? master->ifindex : 0; } #endif } struct packet_offload *gro_find_receive_by_type(__be16 type); struct packet_offload *gro_find_complete_by_type(__be16 type); #endif /* _NET_GRO_H */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 // SPDX-License-Identifier: GPL-2.0-only /* * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . * * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> #include <net/ip.h> #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) static bool raw_before_defrag __read_mostly; MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); module_param(raw_before_defrag, bool, 0000); static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_RAW, }; static const struct xt_table packet_raw_before_defrag = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, }; static struct nf_hook_ops *rawtable_ops __read_mostly; static int iptable_raw_table_init(struct net *net) { struct ipt_replace *repl; const struct xt_table *table = &packet_raw; int ret; if (raw_before_defrag) table = &packet_raw_before_defrag; repl = ipt_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; ret = ipt_register_table(net, table, repl, rawtable_ops); kfree(repl); return ret; } static void __net_exit iptable_raw_net_pre_exit(struct net *net) { ipt_unregister_table_pre_exit(net, "raw"); } static void __net_exit iptable_raw_net_exit(struct net *net) { ipt_unregister_table_exit(net, "raw"); } static struct pernet_operations iptable_raw_net_ops = { .pre_exit = iptable_raw_net_pre_exit, .exit = iptable_raw_net_exit, }; static int __init iptable_raw_init(void) { int ret; const struct xt_table *table = &packet_raw; if (raw_before_defrag) { table = &packet_raw_before_defrag; pr_info("Enabling raw table before defrag\n"); } ret = xt_register_template(table, iptable_raw_table_init); if (ret < 0) return ret; rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); } ret = register_pernet_subsys(&iptable_raw_net_ops); if (ret < 0) { xt_unregister_template(table); kfree(rawtable_ops); return ret; } return ret; } static void __exit iptable_raw_fini(void) { unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); module_exit(iptable_raw_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables legacy raw table");
2 2 2 2 2 1 4 4 4 4 4 1 1 3 2 2 3 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Bobby Eshleman <bobby.eshleman@bytedance.com> * * Based off of net/unix/unix_bpf.c */ #include <linux/bpf.h> #include <linux/module.h> #include <linux/skmsg.h> #include <linux/socket.h> #include <linux/wait.h> #include <net/af_vsock.h> #include <net/sock.h> #define vsock_sk_has_data(__sk, __psock) \ ({ !skb_queue_empty(&(__sk)->sk_receive_queue) || \ !skb_queue_empty(&(__psock)->ingress_skb) || \ !list_empty(&(__psock)->ingress_msg); \ }) static struct proto *vsock_prot_saved __read_mostly; static DEFINE_SPINLOCK(vsock_prot_lock); static struct proto vsock_bpf_prot; static bool vsock_has_data(struct sock *sk, struct sk_psock *psock) { struct vsock_sock *vsk = vsock_sk(sk); s64 ret; ret = vsock_connectible_has_data(vsk); if (ret > 0) return true; return vsock_sk_has_data(sk, psock); } static bool vsock_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) { bool ret; DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk->sk_shutdown & RCV_SHUTDOWN) return true; if (!timeo) return false; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); ret = vsock_has_data(sk, psock); if (!ret) { wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); ret = vsock_has_data(sk, psock); } sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); return ret; } static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct socket *sock = sk->sk_socket; int err; if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) err = __vsock_connectible_recvmsg(sock, msg, len, flags); else if (sk->sk_type == SOCK_DGRAM) err = __vsock_dgram_recvmsg(sock, msg, len, flags); else err = -EPROTOTYPE; return err; } static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_psock *psock; struct vsock_sock *vsk; int copied; psock = sk_psock_get(sk); if (unlikely(!psock)) return __vsock_recvmsg(sk, msg, len, flags); lock_sock(sk); vsk = vsock_sk(sk); if (WARN_ON_ONCE(!vsk->transport)) { copied = -ENODEV; goto out; } if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); return __vsock_recvmsg(sk, msg, len, flags); } copied = sk_msg_recvmsg(sk, psock, msg, len, flags); while (copied == 0) { long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); if (!vsock_msg_wait_data(sk, psock, timeo)) { copied = -EAGAIN; break; } if (sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); return __vsock_recvmsg(sk, msg, len, flags); } copied = sk_msg_recvmsg(sk, psock, msg, len, flags); } out: release_sock(sk); sk_psock_put(sk, psock); return copied; } static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *base) { *prot = *base; prot->close = sock_map_close; prot->recvmsg = vsock_bpf_recvmsg; prot->sock_is_readable = sk_msg_is_readable; } static void vsock_bpf_check_needs_rebuild(struct proto *ops) { /* Paired with the smp_store_release() below. */ if (unlikely(ops != smp_load_acquire(&vsock_prot_saved))) { spin_lock_bh(&vsock_prot_lock); if (likely(ops != vsock_prot_saved)) { vsock_bpf_rebuild_protos(&vsock_bpf_prot, ops); /* Make sure proto function pointers are updated before publishing the * pointer to the struct. */ smp_store_release(&vsock_prot_saved, ops); } spin_unlock_bh(&vsock_prot_lock); } } int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { struct vsock_sock *vsk; if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } vsk = vsock_sk(sk); if (!vsk->transport) return -ENODEV; if (!vsk->transport->read_skb) return -EOPNOTSUPP; vsock_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &vsock_bpf_prot); return 0; } void __init vsock_bpf_build_proto(void) { vsock_bpf_rebuild_protos(&vsock_bpf_prot, &vsock_proto); }
4 4 4 1 1 1 1 1 1 1 1 1 26 2 24 14 1 4 7 7 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/module.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_socket.h> #include <net/inet_sock.h> #include <net/tcp.h> struct nft_socket { enum nft_socket_keys key:8; u8 level; /* cgroupv2 level to extract */ u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; }; }; static void nft_socket_wildcard(const struct nft_pktinfo *pkt, struct nft_regs *regs, struct sock *sk, u32 *dest) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); break; #endif default: regs->verdict.code = NFT_BREAK; return; } } #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { struct cgroup *cgrp; u64 cgid; if (!sk_fullsock(sk)) return false; cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level); if (!cgrp) return false; cgid = cgroup_id(cgrp); memcpy(dest, &cgid, sizeof(u64)); return true; } /* process context only, uses current->nsproxy. */ static noinline int nft_socket_cgroup_subtree_level(void) { struct cgroup *cgrp = cgroup_get_from_path("/"); int level; if (IS_ERR(cgrp)) return PTR_ERR(cgrp); level = cgrp->level; cgroup_put(cgrp); if (level > 255) return -ERANGE; if (WARN_ON_ONCE(level < 0)) return -EINVAL; return level; } #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) { const struct net_device *indev = nft_in(pkt); const struct sk_buff *skb = pkt->skb; struct sock *sk = NULL; if (!indev) return NULL; switch (nft_pf(pkt)) { case NFPROTO_IPV4: sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); break; #endif default: WARN_ON_ONCE(1); break; } return sk; } static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_socket *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; struct sock *sk = skb->sk; u32 *dest = &regs->data[priv->dreg]; if (sk && !net_eq(nft_net(pkt), sock_net(sk))) sk = NULL; if (!sk) sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; return; } switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; goto out_put_sk; } break; #endif default: WARN_ON(1); regs->verdict.code = NFT_BREAK; } out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_socket_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_socket *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY]) return -EINVAL; switch(ctx->family) { case NFPROTO_IPV4: #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: #endif case NFPROTO_INET: break; default: return -EOPNOTSUPP; } priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: len = sizeof(u32); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL])); if (level > 255) return -EOPNOTSUPP; err = nft_socket_cgroup_subtree_level(); if (err < 0) return err; priv->level_user = level; level += err; /* Implies a giant cgroup tree */ if (WARN_ON_ONCE(level > 255)) return -EOPNOTSUPP; priv->level = level; len = sizeof(u64); break; } #endif default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_socket *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key))) return -1; if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } static bool nft_socket_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_socket *priv = nft_expr_priv(expr); const struct nft_socket *socket; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } socket = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != socket->key || priv->dreg != socket->dreg || priv->level != socket->level) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT)); } static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)), .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { .name = "socket", .ops = &nft_socket_ops, .policy = nft_socket_policy, .maxattr = NFTA_SOCKET_MAX, .owner = THIS_MODULE, }; static int __init nft_socket_module_init(void) { return nft_register_expr(&nft_socket_type); } static void __exit nft_socket_module_exit(void) { nft_unregister_expr(&nft_socket_type); } module_init(nft_socket_module_init); module_exit(nft_socket_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Máté Eckl"); MODULE_DESCRIPTION("nf_tables socket match module"); MODULE_ALIAS_NFT_EXPR("socket");
7 1 1 2 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* Kernel module to match connection tracking byte counter. * GPL (C) 2002 Martin Devera (devik@cdi.cz). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/math64.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching"); MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; acct = nf_conn_acct_find(ct); if (!acct) return false; counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) what = div64_u64(bytes, pkts); break; } if (sinfo->count.to >= sinfo->count.from) return what <= sinfo->count.to && what >= sinfo->count.from; else /* inverted */ return what < sinfo->count.to || what > sinfo->count.from; } static int connbytes_mt_check(const struct xt_mtchk_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; int ret; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && sinfo->what != XT_CONNBYTES_AVGPKT) return -EINVAL; if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && sinfo->direction != XT_CONNBYTES_DIR_REPLY && sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } /* * This filter cannot function correctly unless connection tracking * accounting is enabled, so complain in the hope that someone notices. */ if (!nf_ct_acct_enabled(par->net)) { pr_warn("Forcing CT accounting to be enabled\n"); nf_ct_set_acct(par->net, true); } return ret; } static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { .name = "connbytes", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = connbytes_mt_check, .match = connbytes_mt, .destroy = connbytes_mt_destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE, }; static int __init connbytes_mt_init(void) { return xt_register_match(&connbytes_mt_reg); } static void __exit connbytes_mt_exit(void) { xt_unregister_match(&connbytes_mt_reg); } module_init(connbytes_mt_init); module_exit(connbytes_mt_exit);
27 27 26 8 9 1 1 3 1 1 1 1 4 4 4 4 4 4 7 7 1 6 3 1 2 2 1 2 1 1 12 11 2 4 19 1 1 15 2 14 3 3 3 3 5 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2018 Facebook */ #include <linux/bpf.h> #include <linux/err.h> #include <linux/sock_diag.h> #include <net/sock_reuseport.h> #include <linux/btf_ids.h> struct reuseport_array { struct bpf_map map; struct sock __rcu *ptrs[]; }; static struct reuseport_array *reuseport_array(struct bpf_map *map) { return (struct reuseport_array *)map; } /* The caller must hold the reuseport_lock */ void bpf_sk_reuseport_detach(struct sock *sk) { struct sock __rcu **socks; write_lock_bh(&sk->sk_callback_lock); socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); if (socks) { WRITE_ONCE(sk->sk_user_data, NULL); /* * Do not move this NULL assignment outside of * sk->sk_callback_lock because there is * a race with reuseport_array_free() * which does not hold the reuseport_lock. */ RCU_INIT_POINTER(*socks, NULL); } write_unlock_bh(&sk->sk_callback_lock); } static int reuseport_array_alloc_check(union bpf_attr *attr) { if (attr->value_size != sizeof(u32) && attr->value_size != sizeof(u64)) return -EINVAL; return array_map_alloc_check(attr); } static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key) { struct reuseport_array *array = reuseport_array(map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return rcu_dereference(array->ptrs[index]); } /* Called from syscall only */ static long reuseport_array_delete_elem(struct bpf_map *map, void *key) { struct reuseport_array *array = reuseport_array(map); u32 index = *(u32 *)key; struct sock *sk; int err; if (index >= map->max_entries) return -E2BIG; if (!rcu_access_pointer(array->ptrs[index])) return -ENOENT; spin_lock_bh(&reuseport_lock); sk = rcu_dereference_protected(array->ptrs[index], lockdep_is_held(&reuseport_lock)); if (sk) { write_lock_bh(&sk->sk_callback_lock); WRITE_ONCE(sk->sk_user_data, NULL); RCU_INIT_POINTER(array->ptrs[index], NULL); write_unlock_bh(&sk->sk_callback_lock); err = 0; } else { err = -ENOENT; } spin_unlock_bh(&reuseport_lock); return err; } static void reuseport_array_free(struct bpf_map *map) { struct reuseport_array *array = reuseport_array(map); struct sock *sk; u32 i; /* * ops->map_*_elem() will not be able to access this * array now. Hence, this function only races with * bpf_sk_reuseport_detach() which was triggered by * close() or disconnect(). * * This function and bpf_sk_reuseport_detach() are * both removing sk from "array". Who removes it * first does not matter. * * The only concern here is bpf_sk_reuseport_detach() * may access "array" which is being freed here. * bpf_sk_reuseport_detach() access this "array" * through sk->sk_user_data _and_ with sk->sk_callback_lock * held which is enough because this "array" is not freed * until all sk->sk_user_data has stopped referencing this "array". * * Hence, due to the above, taking "reuseport_lock" is not * needed here. */ /* * Since reuseport_lock is not taken, sk is accessed under * rcu_read_lock() */ rcu_read_lock(); for (i = 0; i < map->max_entries; i++) { sk = rcu_dereference(array->ptrs[i]); if (sk) { write_lock_bh(&sk->sk_callback_lock); /* * No need for WRITE_ONCE(). At this point, * no one is reading it without taking the * sk->sk_callback_lock. */ sk->sk_user_data = NULL; write_unlock_bh(&sk->sk_callback_lock); RCU_INIT_POINTER(array->ptrs[i], NULL); } } rcu_read_unlock(); /* * Once reaching here, all sk->sk_user_data is not * referencing this "array". "array" can be freed now. */ bpf_map_area_free(array); } static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); if (!array) return ERR_PTR(-ENOMEM); /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); return &array->map; } int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { struct sock *sk; int err; if (map->value_size != sizeof(u64)) return -ENOSPC; rcu_read_lock(); sk = reuseport_array_lookup_elem(map, key); if (sk) { *(u64 *)value = __sock_gen_cookie(sk); err = 0; } else { err = -ENOENT; } rcu_read_unlock(); return err; } static int reuseport_array_update_check(const struct reuseport_array *array, const struct sock *nsk, const struct sock *osk, const struct sock_reuseport *nsk_reuse, u32 map_flags) { if (osk && map_flags == BPF_NOEXIST) return -EEXIST; if (!osk && map_flags == BPF_EXIST) return -ENOENT; if (nsk->sk_protocol != IPPROTO_UDP && nsk->sk_protocol != IPPROTO_TCP) return -ENOTSUPP; if (nsk->sk_family != AF_INET && nsk->sk_family != AF_INET6) return -ENOTSUPP; if (nsk->sk_type != SOCK_STREAM && nsk->sk_type != SOCK_DGRAM) return -ENOTSUPP; /* * sk must be hashed (i.e. listening in the TCP case or binded * in the UDP case) and * it must also be a SO_REUSEPORT sk (i.e. reuse cannot be NULL). * * Also, sk will be used in bpf helper that is protected by * rcu_read_lock(). */ if (!sock_flag(nsk, SOCK_RCU_FREE) || !sk_hashed(nsk) || !nsk_reuse) return -EINVAL; /* READ_ONCE because the sk->sk_callback_lock may not be held here */ if (READ_ONCE(nsk->sk_user_data)) return -EBUSY; return 0; } /* * Called from syscall only. * The "nsk" in the fd refcnt. * The "osk" and "reuse" are protected by reuseport_lock. */ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct reuseport_array *array = reuseport_array(map); struct sock *free_osk = NULL, *osk, *nsk; struct sock_reuseport *reuse; u32 index = *(u32 *)key; uintptr_t sk_user_data; struct socket *socket; int err, fd; if (map_flags > BPF_EXIST) return -EINVAL; if (index >= map->max_entries) return -E2BIG; if (map->value_size == sizeof(u64)) { u64 fd64 = *(u64 *)value; if (fd64 > S32_MAX) return -EINVAL; fd = fd64; } else { fd = *(int *)value; } socket = sockfd_lookup(fd, &err); if (!socket) return err; nsk = socket->sk; if (!nsk) { err = -EINVAL; goto put_file; } /* Quick checks before taking reuseport_lock */ err = reuseport_array_update_check(array, nsk, rcu_access_pointer(array->ptrs[index]), rcu_access_pointer(nsk->sk_reuseport_cb), map_flags); if (err) goto put_file; spin_lock_bh(&reuseport_lock); /* * Some of the checks only need reuseport_lock * but it is done under sk_callback_lock also * for simplicity reason. */ write_lock_bh(&nsk->sk_callback_lock); osk = rcu_dereference_protected(array->ptrs[index], lockdep_is_held(&reuseport_lock)); reuse = rcu_dereference_protected(nsk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); err = reuseport_array_update_check(array, nsk, osk, reuse, map_flags); if (err) goto put_file_unlock; sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF; WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data); rcu_assign_pointer(array->ptrs[index], nsk); free_osk = osk; err = 0; put_file_unlock: write_unlock_bh(&nsk->sk_callback_lock); if (free_osk) { write_lock_bh(&free_osk->sk_callback_lock); WRITE_ONCE(free_osk->sk_user_data, NULL); write_unlock_bh(&free_osk->sk_callback_lock); } spin_unlock_bh(&reuseport_lock); put_file: sockfd_put(socket); return err; } /* Called from syscall */ static int reuseport_array_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct reuseport_array *array = reuseport_array(map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { *next = 0; return 0; } if (index == array->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; } static u64 reuseport_array_mem_usage(const struct bpf_map *map) { struct reuseport_array *array; return struct_size(array, ptrs, map->max_entries); } BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array) const struct bpf_map_ops reuseport_array_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = reuseport_array_alloc_check, .map_alloc = reuseport_array_alloc, .map_free = reuseport_array_free, .map_lookup_elem = reuseport_array_lookup_elem, .map_get_next_key = reuseport_array_get_next_key, .map_delete_elem = reuseport_array_delete_elem, .map_mem_usage = reuseport_array_mem_usage, .map_btf_id = &reuseport_array_map_btf_ids[0], };
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 // SPDX-License-Identifier: GPL-2.0-or-later /* * (c) 1999 Andreas Gal <gal@cs.uni-magdeburg.de> * (c) 2000-2001 Vojtech Pavlik <vojtech@ucw.cz> * (c) 2007-2009 Jiri Kosina * * HID debugging support */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail: * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/kfifo.h> #include <linux/sched/signal.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/hid.h> #include <linux/hid-debug.h> static struct dentry *hid_debug_root; struct hid_usage_entry { unsigned page; unsigned usage; const char *description; }; static const struct hid_usage_entry hid_usage_table[] = { { 0x00, 0, "Undefined" }, { 0x01, 0, "GenericDesktop" }, { 0x01, 0x0001, "Pointer" }, { 0x01, 0x0002, "Mouse" }, { 0x01, 0x0004, "Joystick" }, { 0x01, 0x0005, "Gamepad" }, { 0x01, 0x0006, "Keyboard" }, { 0x01, 0x0007, "Keypad" }, { 0x01, 0x0008, "MultiaxisController" }, { 0x01, 0x0009, "TabletPCSystemControls" }, { 0x01, 0x000a, "WaterCoolingDevice" }, { 0x01, 0x000b, "ComputerChassisDevice" }, { 0x01, 0x000c, "WirelessRadioControls" }, { 0x01, 0x000d, "PortableDeviceControl" }, { 0x01, 0x000e, "SystemMultiAxisController" }, { 0x01, 0x000f, "SpatialController" }, { 0x01, 0x0010, "AssistiveControl" }, { 0x01, 0x0011, "DeviceDock" }, { 0x01, 0x0012, "DockableDevice" }, { 0x01, 0x0013, "CallStateManagementControl" }, { 0x01, 0x0030, "X" }, { 0x01, 0x0031, "Y" }, { 0x01, 0x0032, "Z" }, { 0x01, 0x0033, "Rx" }, { 0x01, 0x0034, "Ry" }, { 0x01, 0x0035, "Rz" }, { 0x01, 0x0036, "Slider" }, { 0x01, 0x0037, "Dial" }, { 0x01, 0x0038, "Wheel" }, { 0x01, 0x0039, "HatSwitch" }, { 0x01, 0x003a, "CountedBuffer" }, { 0x01, 0x003b, "ByteCount" }, { 0x01, 0x003c, "MotionWakeup" }, { 0x01, 0x003d, "Start" }, { 0x01, 0x003e, "Select" }, { 0x01, 0x0040, "Vx" }, { 0x01, 0x0041, "Vy" }, { 0x01, 0x0042, "Vz" }, { 0x01, 0x0043, "Vbrx" }, { 0x01, 0x0044, "Vbry" }, { 0x01, 0x0045, "Vbrz" }, { 0x01, 0x0046, "Vno" }, { 0x01, 0x0047, "FeatureNotification" }, { 0x01, 0x0048, "ResolutionMultiplier" }, { 0x01, 0x0049, "Qx" }, { 0x01, 0x004a, "Qy" }, { 0x01, 0x004b, "Qz" }, { 0x01, 0x004c, "Qw" }, { 0x01, 0x0080, "SystemControl" }, { 0x01, 0x0081, "SystemPowerDown" }, { 0x01, 0x0082, "SystemSleep" }, { 0x01, 0x0083, "SystemWakeUp" }, { 0x01, 0x0084, "SystemContextMenu" }, { 0x01, 0x0085, "SystemMainMenu" }, { 0x01, 0x0086, "SystemAppMenu" }, { 0x01, 0x0087, "SystemMenuHelp" }, { 0x01, 0x0088, "SystemMenuExit" }, { 0x01, 0x0089, "SystemMenuSelect" }, { 0x01, 0x008a, "SystemMenuRight" }, { 0x01, 0x008b, "SystemMenuLeft" }, { 0x01, 0x008c, "SystemMenuUp" }, { 0x01, 0x008d, "SystemMenuDown" }, { 0x01, 0x008e, "SystemColdRestart" }, { 0x01, 0x008f, "SystemWarmRestart" }, { 0x01, 0x0090, "DpadUp" }, { 0x01, 0x0091, "DpadDown" }, { 0x01, 0x0092, "DpadRight" }, { 0x01, 0x0093, "DpadLeft" }, { 0x01, 0x0094, "IndexTrigger" }, { 0x01, 0x0095, "PalmTrigger" }, { 0x01, 0x0096, "Thumbstick" }, { 0x01, 0x0097, "SystemFunctionShift" }, { 0x01, 0x0098, "SystemFunctionShiftLock" }, { 0x01, 0x0099, "SystemFunctionShiftLockIndicator" }, { 0x01, 0x009a, "SystemDismissNotification" }, { 0x01, 0x009b, "SystemDoNotDisturb" }, { 0x01, 0x00a0, "SystemDock" }, { 0x01, 0x00a1, "SystemUndock" }, { 0x01, 0x00a2, "SystemSetup" }, { 0x01, 0x00a3, "SystemBreak" }, { 0x01, 0x00a4, "SystemDebuggerBreak" }, { 0x01, 0x00a5, "ApplicationBreak" }, { 0x01, 0x00a6, "ApplicationDebuggerBreak" }, { 0x01, 0x00a7, "SystemSpeakerMute" }, { 0x01, 0x00a8, "SystemHibernate" }, { 0x01, 0x00a9, "SystemMicrophoneMute" }, { 0x01, 0x00b0, "SystemDisplayInvert" }, { 0x01, 0x00b1, "SystemDisplayInternal" }, { 0x01, 0x00b2, "SystemDisplayExternal" }, { 0x01, 0x00b3, "SystemDisplayBoth" }, { 0x01, 0x00b4, "SystemDisplayDual" }, { 0x01, 0x00b5, "SystemDisplayToggleIntExtMode" }, { 0x01, 0x00b6, "SystemDisplaySwapPrimarySecondary" }, { 0x01, 0x00b7, "SystemDisplayToggleLCDAutoscale" }, { 0x01, 0x00c0, "SensorZone" }, { 0x01, 0x00c1, "RPM" }, { 0x01, 0x00c2, "CoolantLevel" }, { 0x01, 0x00c3, "CoolantCriticalLevel" }, { 0x01, 0x00c4, "CoolantPump" }, { 0x01, 0x00c5, "ChassisEnclosure" }, { 0x01, 0x00c6, "WirelessRadioButton" }, { 0x01, 0x00c7, "WirelessRadioLED" }, { 0x01, 0x00c8, "WirelessRadioSliderSwitch" }, { 0x01, 0x00c9, "SystemDisplayRotationLockButton" }, { 0x01, 0x00ca, "SystemDisplayRotationLockSliderSwitch" }, { 0x01, 0x00cb, "ControlEnable" }, { 0x01, 0x00d0, "DockableDeviceUniqueID" }, { 0x01, 0x00d1, "DockableDeviceVendorID" }, { 0x01, 0x00d2, "DockableDevicePrimaryUsagePage" }, { 0x01, 0x00d3, "DockableDevicePrimaryUsageID" }, { 0x01, 0x00d4, "DockableDeviceDockingState" }, { 0x01, 0x00d5, "DockableDeviceDisplayOcclusion" }, { 0x01, 0x00d6, "DockableDeviceObjectType" }, { 0x01, 0x00e0, "CallActiveLED" }, { 0x01, 0x00e1, "CallMuteToggle" }, { 0x01, 0x00e2, "CallMuteLED" }, { 0x02, 0, "SimulationControls" }, { 0x02, 0x0001, "FlightSimulationDevice" }, { 0x02, 0x0002, "AutomobileSimulationDevice" }, { 0x02, 0x0003, "TankSimulationDevice" }, { 0x02, 0x0004, "SpaceshipSimulationDevice" }, { 0x02, 0x0005, "SubmarineSimulationDevice" }, { 0x02, 0x0006, "SailingSimulationDevice" }, { 0x02, 0x0007, "MotorcycleSimulationDevice" }, { 0x02, 0x0008, "SportsSimulationDevice" }, { 0x02, 0x0009, "AirplaneSimulationDevice" }, { 0x02, 0x000a, "HelicopterSimulationDevice" }, { 0x02, 0x000b, "MagicCarpetSimulationDevice" }, { 0x02, 0x000c, "BicycleSimulationDevice" }, { 0x02, 0x0020, "FlightControlStick" }, { 0x02, 0x0021, "FlightStick" }, { 0x02, 0x0022, "CyclicControl" }, { 0x02, 0x0023, "CyclicTrim" }, { 0x02, 0x0024, "FlightYoke" }, { 0x02, 0x0025, "TrackControl" }, { 0x02, 0x00b0, "Aileron" }, { 0x02, 0x00b1, "AileronTrim" }, { 0x02, 0x00b2, "AntiTorqueControl" }, { 0x02, 0x00b3, "AutopilotEnable" }, { 0x02, 0x00b4, "ChaffRelease" }, { 0x02, 0x00b5, "CollectiveControl" }, { 0x02, 0x00b6, "DiveBrake" }, { 0x02, 0x00b7, "ElectronicCountermeasures" }, { 0x02, 0x00b8, "Elevator" }, { 0x02, 0x00b9, "ElevatorTrim" }, { 0x02, 0x00ba, "Rudder" }, { 0x02, 0x00bb, "Throttle" }, { 0x02, 0x00bc, "FlightCommunications" }, { 0x02, 0x00bd, "FlareRelease" }, { 0x02, 0x00be, "LandingGear" }, { 0x02, 0x00bf, "ToeBrake" }, { 0x02, 0x00c0, "Trigger" }, { 0x02, 0x00c1, "WeaponsArm" }, { 0x02, 0x00c2, "WeaponsSelect" }, { 0x02, 0x00c3, "WingFlaps" }, { 0x02, 0x00c4, "Accelerator" }, { 0x02, 0x00c5, "Brake" }, { 0x02, 0x00c6, "Clutch" }, { 0x02, 0x00c7, "Shifter" }, { 0x02, 0x00c8, "Steering" }, { 0x02, 0x00c9, "TurretDirection" }, { 0x02, 0x00ca, "BarrelElevation" }, { 0x02, 0x00cb, "DivePlane" }, { 0x02, 0x00cc, "Ballast" }, { 0x02, 0x00cd, "BicycleCrank" }, { 0x02, 0x00ce, "HandleBars" }, { 0x02, 0x00cf, "FrontBrake" }, { 0x02, 0x00d0, "RearBrake" }, { 0x03, 0, "VRControls" }, { 0x03, 0x0001, "Belt" }, { 0x03, 0x0002, "BodySuit" }, { 0x03, 0x0003, "Flexor" }, { 0x03, 0x0004, "Glove" }, { 0x03, 0x0005, "HeadTracker" }, { 0x03, 0x0006, "HeadMountedDisplay" }, { 0x03, 0x0007, "HandTracker" }, { 0x03, 0x0008, "Oculometer" }, { 0x03, 0x0009, "Vest" }, { 0x03, 0x000a, "AnimatronicDevice" }, { 0x03, 0x0020, "StereoEnable" }, { 0x03, 0x0021, "DisplayEnable" }, { 0x04, 0, "SportControls" }, { 0x04, 0x0001, "BaseballBat" }, { 0x04, 0x0002, "GolfClub" }, { 0x04, 0x0003, "RowingMachine" }, { 0x04, 0x0004, "Treadmill" }, { 0x04, 0x0030, "Oar" }, { 0x04, 0x0031, "Slope" }, { 0x04, 0x0032, "Rate" }, { 0x04, 0x0033, "StickSpeed" }, { 0x04, 0x0034, "StickFaceAngle" }, { 0x04, 0x0035, "StickHeelToe" }, { 0x04, 0x0036, "StickFollowThrough" }, { 0x04, 0x0037, "StickTempo" }, { 0x04, 0x0038, "StickType" }, { 0x04, 0x0039, "StickHeight" }, { 0x04, 0x0050, "Putter" }, { 0x04, 0x0051, "1Iron" }, { 0x04, 0x0052, "2Iron" }, { 0x04, 0x0053, "3Iron" }, { 0x04, 0x0054, "4Iron" }, { 0x04, 0x0055, "5Iron" }, { 0x04, 0x0056, "6Iron" }, { 0x04, 0x0057, "7Iron" }, { 0x04, 0x0058, "8Iron" }, { 0x04, 0x0059, "9Iron" }, { 0x04, 0x005a, "10Iron" }, { 0x04, 0x005b, "11Iron" }, { 0x04, 0x005c, "SandWedge" }, { 0x04, 0x005d, "LoftWedge" }, { 0x04, 0x005e, "PowerWedge" }, { 0x04, 0x005f, "1Wood" }, { 0x04, 0x0060, "3Wood" }, { 0x04, 0x0061, "5Wood" }, { 0x04, 0x0062, "7Wood" }, { 0x04, 0x0063, "9Wood" }, { 0x05, 0, "GameControls" }, { 0x05, 0x0001, "3DGameController" }, { 0x05, 0x0002, "PinballDevice" }, { 0x05, 0x0003, "GunDevice" }, { 0x05, 0x0020, "PointofView" }, { 0x05, 0x0021, "TurnRightLeft" }, { 0x05, 0x0022, "PitchForwardBackward" }, { 0x05, 0x0023, "RollRightLeft" }, { 0x05, 0x0024, "MoveRightLeft" }, { 0x05, 0x0025, "MoveForwardBackward" }, { 0x05, 0x0026, "MoveUpDown" }, { 0x05, 0x0027, "LeanRightLeft" }, { 0x05, 0x0028, "LeanForwardBackward" }, { 0x05, 0x0029, "HeightofPOV" }, { 0x05, 0x002a, "Flipper" }, { 0x05, 0x002b, "SecondaryFlipper" }, { 0x05, 0x002c, "Bump" }, { 0x05, 0x002d, "NewGame" }, { 0x05, 0x002e, "ShootBall" }, { 0x05, 0x002f, "Player" }, { 0x05, 0x0030, "GunBolt" }, { 0x05, 0x0031, "GunClip" }, { 0x05, 0x0032, "GunSelector" }, { 0x05, 0x0033, "GunSingleShot" }, { 0x05, 0x0034, "GunBurst" }, { 0x05, 0x0035, "GunAutomatic" }, { 0x05, 0x0036, "GunSafety" }, { 0x05, 0x0037, "GamepadFireJump" }, { 0x05, 0x0039, "GamepadTrigger" }, { 0x05, 0x003a, "FormfittingGamepad" }, { 0x06, 0, "GenericDeviceControls" }, { 0x06, 0x0001, "BackgroundNonuserControls" }, { 0x06, 0x0020, "BatteryStrength" }, { 0x06, 0x0021, "WirelessChannel" }, { 0x06, 0x0022, "WirelessID" }, { 0x06, 0x0023, "DiscoverWirelessControl" }, { 0x06, 0x0024, "SecurityCodeCharacterEntered" }, { 0x06, 0x0025, "SecurityCodeCharacterErased" }, { 0x06, 0x0026, "SecurityCodeCleared" }, { 0x06, 0x0027, "SequenceID" }, { 0x06, 0x0028, "SequenceIDReset" }, { 0x06, 0x0029, "RFSignalStrength" }, { 0x06, 0x002a, "SoftwareVersion" }, { 0x06, 0x002b, "ProtocolVersion" }, { 0x06, 0x002c, "HardwareVersion" }, { 0x06, 0x002d, "Major" }, { 0x06, 0x002e, "Minor" }, { 0x06, 0x002f, "Revision" }, { 0x06, 0x0030, "Handedness" }, { 0x06, 0x0031, "EitherHand" }, { 0x06, 0x0032, "LeftHand" }, { 0x06, 0x0033, "RightHand" }, { 0x06, 0x0034, "BothHands" }, { 0x06, 0x0040, "GripPoseOffset" }, { 0x06, 0x0041, "PointerPoseOffset" }, { 0x07, 0, "KeyboardKeypad" }, { 0x07, 0x0001, "ErrorRollOver" }, { 0x07, 0x0002, "POSTFail" }, { 0x07, 0x0003, "ErrorUndefined" }, { 0x07, 0x0004, "KeyboardA" }, { 0x07, 0x0005, "KeyboardB" }, { 0x07, 0x0006, "KeyboardC" }, { 0x07, 0x0007, "KeyboardD" }, { 0x07, 0x0008, "KeyboardE" }, { 0x07, 0x0009, "KeyboardF" }, { 0x07, 0x000a, "KeyboardG" }, { 0x07, 0x000b, "KeyboardH" }, { 0x07, 0x000c, "KeyboardI" }, { 0x07, 0x000d, "KeyboardJ" }, { 0x07, 0x000e, "KeyboardK" }, { 0x07, 0x000f, "KeyboardL" }, { 0x07, 0x0010, "KeyboardM" }, { 0x07, 0x0011, "KeyboardN" }, { 0x07, 0x0012, "KeyboardO" }, { 0x07, 0x0013, "KeyboardP" }, { 0x07, 0x0014, "KeyboardQ" }, { 0x07, 0x0015, "KeyboardR" }, { 0x07, 0x0016, "KeyboardS" }, { 0x07, 0x0017, "KeyboardT" }, { 0x07, 0x0018, "KeyboardU" }, { 0x07, 0x0019, "KeyboardV" }, { 0x07, 0x001a, "KeyboardW" }, { 0x07, 0x001b, "KeyboardX" }, { 0x07, 0x001c, "KeyboardY" }, { 0x07, 0x001d, "KeyboardZ" }, { 0x07, 0x001e, "Keyboard1andBang" }, { 0x07, 0x001f, "Keyboard2andAt" }, { 0x07, 0x0020, "Keyboard3andHash" }, { 0x07, 0x0021, "Keyboard4andDollar" }, { 0x07, 0x0022, "Keyboard5andPercent" }, { 0x07, 0x0023, "Keyboard6andCaret" }, { 0x07, 0x0024, "Keyboard7andAmpersand" }, { 0x07, 0x0025, "Keyboard8andStar" }, { 0x07, 0x0026, "Keyboard9andLeftBracket" }, { 0x07, 0x0027, "Keyboard0andRightBracket" }, { 0x07, 0x0028, "KeyboardReturnEnter" }, { 0x07, 0x0029, "KeyboardEscape" }, { 0x07, 0x002a, "KeyboardDelete" }, { 0x07, 0x002b, "KeyboardTab" }, { 0x07, 0x002c, "KeyboardSpacebar" }, { 0x07, 0x002d, "KeyboardDashandUnderscore" }, { 0x07, 0x002e, "KeyboardEqualsandPlus" }, { 0x07, 0x002f, "KeyboardLeftBrace" }, { 0x07, 0x0030, "KeyboardRightBrace" }, { 0x07, 0x0031, "KeyboardBackslashandPipe" }, { 0x07, 0x0032, "KeyboardNonUSHashandTilde" }, { 0x07, 0x0033, "KeyboardSemiColonandColon" }, { 0x07, 0x0034, "KeyboardLeftAposandDouble" }, { 0x07, 0x0035, "KeyboardGraveAccentandTilde" }, { 0x07, 0x0036, "KeyboardCommaandLessThan" }, { 0x07, 0x0037, "KeyboardPeriodandGreaterThan" }, { 0x07, 0x0038, "KeyboardForwardSlashandQuestionMark" }, { 0x07, 0x0039, "KeyboardCapsLock" }, { 0x07, 0x003a, "KeyboardF1" }, { 0x07, 0x003b, "KeyboardF2" }, { 0x07, 0x003c, "KeyboardF3" }, { 0x07, 0x003d, "KeyboardF4" }, { 0x07, 0x003e, "KeyboardF5" }, { 0x07, 0x003f, "KeyboardF6" }, { 0x07, 0x0040, "KeyboardF7" }, { 0x07, 0x0041, "KeyboardF8" }, { 0x07, 0x0042, "KeyboardF9" }, { 0x07, 0x0043, "KeyboardF10" }, { 0x07, 0x0044, "KeyboardF11" }, { 0x07, 0x0045, "KeyboardF12" }, { 0x07, 0x0046, "KeyboardPrintScreen" }, { 0x07, 0x0047, "KeyboardScrollLock" }, { 0x07, 0x0048, "KeyboardPause" }, { 0x07, 0x0049, "KeyboardInsert" }, { 0x07, 0x004a, "KeyboardHome" }, { 0x07, 0x004b, "KeyboardPageUp" }, { 0x07, 0x004c, "KeyboardDeleteForward" }, { 0x07, 0x004d, "KeyboardEnd" }, { 0x07, 0x004e, "KeyboardPageDown" }, { 0x07, 0x004f, "KeyboardRightArrow" }, { 0x07, 0x0050, "KeyboardLeftArrow" }, { 0x07, 0x0051, "KeyboardDownArrow" }, { 0x07, 0x0052, "KeyboardUpArrow" }, { 0x07, 0x0053, "KeypadNumLockandClear" }, { 0x07, 0x0054, "KeypadForwardSlash" }, { 0x07, 0x0055, "KeypadStar" }, { 0x07, 0x0056, "KeypadDash" }, { 0x07, 0x0057, "KeypadPlus" }, { 0x07, 0x0058, "KeypadENTER" }, { 0x07, 0x0059, "Keypad1andEnd" }, { 0x07, 0x005a, "Keypad2andDownArrow" }, { 0x07, 0x005b, "Keypad3andPageDn" }, { 0x07, 0x005c, "Keypad4andLeftArrow" }, { 0x07, 0x005d, "Keypad5" }, { 0x07, 0x005e, "Keypad6andRightArrow" }, { 0x07, 0x005f, "Keypad7andHome" }, { 0x07, 0x0060, "Keypad8andUpArrow" }, { 0x07, 0x0061, "Keypad9andPageUp" }, { 0x07, 0x0062, "Keypad0andInsert" }, { 0x07, 0x0063, "KeypadPeriodandDelete" }, { 0x07, 0x0064, "KeyboardNonUSBackslashandPipe" }, { 0x07, 0x0065, "KeyboardApplication" }, { 0x07, 0x0066, "KeyboardPower" }, { 0x07, 0x0067, "KeypadEquals" }, { 0x07, 0x0068, "KeyboardF13" }, { 0x07, 0x0069, "KeyboardF14" }, { 0x07, 0x006a, "KeyboardF15" }, { 0x07, 0x006b, "KeyboardF16" }, { 0x07, 0x006c, "KeyboardF17" }, { 0x07, 0x006d, "KeyboardF18" }, { 0x07, 0x006e, "KeyboardF19" }, { 0x07, 0x006f, "KeyboardF20" }, { 0x07, 0x0070, "KeyboardF21" }, { 0x07, 0x0071, "KeyboardF22" }, { 0x07, 0x0072, "KeyboardF23" }, { 0x07, 0x0073, "KeyboardF24" }, { 0x07, 0x0074, "KeyboardExecute" }, { 0x07, 0x0075, "KeyboardHelp" }, { 0x07, 0x0076, "KeyboardMenu" }, { 0x07, 0x0077, "KeyboardSelect" }, { 0x07, 0x0078, "KeyboardStop" }, { 0x07, 0x0079, "KeyboardAgain" }, { 0x07, 0x007a, "KeyboardUndo" }, { 0x07, 0x007b, "KeyboardCut" }, { 0x07, 0x007c, "KeyboardCopy" }, { 0x07, 0x007d, "KeyboardPaste" }, { 0x07, 0x007e, "KeyboardFind" }, { 0x07, 0x007f, "KeyboardMute" }, { 0x07, 0x0080, "KeyboardVolumeUp" }, { 0x07, 0x0081, "KeyboardVolumeDown" }, { 0x07, 0x0082, "KeyboardLockingCapsLock" }, { 0x07, 0x0083, "KeyboardLockingNumLock" }, { 0x07, 0x0084, "KeyboardLockingScrollLock" }, { 0x07, 0x0085, "KeypadComma" }, { 0x07, 0x0086, "KeypadEqualSign" }, { 0x07, 0x0087, "KeyboardInternational1" }, { 0x07, 0x0088, "KeyboardInternational2" }, { 0x07, 0x0089, "KeyboardInternational3" }, { 0x07, 0x008a, "KeyboardInternational4" }, { 0x07, 0x008b, "KeyboardInternational5" }, { 0x07, 0x008c, "KeyboardInternational6" }, { 0x07, 0x008d, "KeyboardInternational7" }, { 0x07, 0x008e, "KeyboardInternational8" }, { 0x07, 0x008f, "KeyboardInternational9" }, { 0x07, 0x0090, "KeyboardLANG1" }, { 0x07, 0x0091, "KeyboardLANG2" }, { 0x07, 0x0092, "KeyboardLANG3" }, { 0x07, 0x0093, "KeyboardLANG4" }, { 0x07, 0x0094, "KeyboardLANG5" }, { 0x07, 0x0095, "KeyboardLANG6" }, { 0x07, 0x0096, "KeyboardLANG7" }, { 0x07, 0x0097, "KeyboardLANG8" }, { 0x07, 0x0098, "KeyboardLANG9" }, { 0x07, 0x0099, "KeyboardAlternateErase" }, { 0x07, 0x009a, "KeyboardSysReqAttention" }, { 0x07, 0x009b, "KeyboardCancel" }, { 0x07, 0x009c, "KeyboardClear" }, { 0x07, 0x009d, "KeyboardPrior" }, { 0x07, 0x009e, "KeyboardReturn" }, { 0x07, 0x009f, "KeyboardSeparator" }, { 0x07, 0x00a0, "KeyboardOut" }, { 0x07, 0x00a1, "KeyboardOper" }, { 0x07, 0x00a2, "KeyboardClearAgain" }, { 0x07, 0x00a3, "KeyboardCrSelProps" }, { 0x07, 0x00a4, "KeyboardExSel" }, { 0x07, 0x00b0, "KeypadDouble0" }, { 0x07, 0x00b1, "KeypadTriple0" }, { 0x07, 0x00b2, "ThousandsSeparator" }, { 0x07, 0x00b3, "DecimalSeparator" }, { 0x07, 0x00b4, "CurrencyUnit" }, { 0x07, 0x00b5, "CurrencySubunit" }, { 0x07, 0x00b6, "KeypadLeftBracket" }, { 0x07, 0x00b7, "KeypadRightBracket" }, { 0x07, 0x00b8, "KeypadLeftBrace" }, { 0x07, 0x00b9, "KeypadRightBrace" }, { 0x07, 0x00ba, "KeypadTab" }, { 0x07, 0x00bb, "KeypadBackspace" }, { 0x07, 0x00bc, "KeypadA" }, { 0x07, 0x00bd, "KeypadB" }, { 0x07, 0x00be, "KeypadC" }, { 0x07, 0x00bf, "KeypadD" }, { 0x07, 0x00c0, "KeypadE" }, { 0x07, 0x00c1, "KeypadF" }, { 0x07, 0x00c2, "KeypadXOR" }, { 0x07, 0x00c3, "KeypadCaret" }, { 0x07, 0x00c4, "KeypadPercentage" }, { 0x07, 0x00c5, "KeypadLess" }, { 0x07, 0x00c6, "KeypadGreater" }, { 0x07, 0x00c7, "KeypadAmpersand" }, { 0x07, 0x00c8, "KeypadDoubleAmpersand" }, { 0x07, 0x00c9, "KeypadBar" }, { 0x07, 0x00ca, "KeypadDoubleBar" }, { 0x07, 0x00cb, "KeypadColon" }, { 0x07, 0x00cc, "KeypadHash" }, { 0x07, 0x00cd, "KeypadSpace" }, { 0x07, 0x00ce, "KeypadAt" }, { 0x07, 0x00cf, "KeypadBang" }, { 0x07, 0x00d0, "KeypadMemoryStore" }, { 0x07, 0x00d1, "KeypadMemoryRecall" }, { 0x07, 0x00d2, "KeypadMemoryClear" }, { 0x07, 0x00d3, "KeypadMemoryAdd" }, { 0x07, 0x00d4, "KeypadMemorySubtract" }, { 0x07, 0x00d5, "KeypadMemoryMultiply" }, { 0x07, 0x00d6, "KeypadMemoryDivide" }, { 0x07, 0x00d7, "KeypadPlusMinus" }, { 0x07, 0x00d8, "KeypadClear" }, { 0x07, 0x00d9, "KeypadClearEntry" }, { 0x07, 0x00da, "KeypadBinary" }, { 0x07, 0x00db, "KeypadOctal" }, { 0x07, 0x00dc, "KeypadDecimal" }, { 0x07, 0x00dd, "KeypadHexadecimal" }, { 0x07, 0x00e0, "KeyboardLeftControl" }, { 0x07, 0x00e1, "KeyboardLeftShift" }, { 0x07, 0x00e2, "KeyboardLeftAlt" }, { 0x07, 0x00e3, "KeyboardLeftGUI" }, { 0x07, 0x00e4, "KeyboardRightControl" }, { 0x07, 0x00e5, "KeyboardRightShift" }, { 0x07, 0x00e6, "KeyboardRightAlt" }, { 0x07, 0x00e7, "KeyboardRightGUI" }, { 0x08, 0, "LED" }, { 0x08, 0x0001, "NumLock" }, { 0x08, 0x0002, "CapsLock" }, { 0x08, 0x0003, "ScrollLock" }, { 0x08, 0x0004, "Compose" }, { 0x08, 0x0005, "Kana" }, { 0x08, 0x0006, "Power" }, { 0x08, 0x0007, "Shift" }, { 0x08, 0x0008, "DoNotDisturb" }, { 0x08, 0x0009, "Mute" }, { 0x08, 0x000a, "ToneEnable" }, { 0x08, 0x000b, "HighCutFilter" }, { 0x08, 0x000c, "LowCutFilter" }, { 0x08, 0x000d, "EqualizerEnable" }, { 0x08, 0x000e, "SoundFieldOn" }, { 0x08, 0x000f, "SurroundOn" }, { 0x08, 0x0010, "Repeat" }, { 0x08, 0x0011, "Stereo" }, { 0x08, 0x0012, "SamplingRateDetect" }, { 0x08, 0x0013, "Spinning" }, { 0x08, 0x0014, "CAV" }, { 0x08, 0x0015, "CLV" }, { 0x08, 0x0016, "RecordingFormatDetect" }, { 0x08, 0x0017, "OffHook" }, { 0x08, 0x0018, "Ring" }, { 0x08, 0x0019, "MessageWaiting" }, { 0x08, 0x001a, "DataMode" }, { 0x08, 0x001b, "BatteryOperation" }, { 0x08, 0x001c, "BatteryOK" }, { 0x08, 0x001d, "BatteryLow" }, { 0x08, 0x001e, "Speaker" }, { 0x08, 0x001f, "Headset" }, { 0x08, 0x0020, "Hold" }, { 0x08, 0x0021, "Microphone" }, { 0x08, 0x0022, "Coverage" }, { 0x08, 0x0023, "NightMode" }, { 0x08, 0x0024, "SendCalls" }, { 0x08, 0x0025, "CallPickup" }, { 0x08, 0x0026, "Conference" }, { 0x08, 0x0027, "Standby" }, { 0x08, 0x0028, "CameraOn" }, { 0x08, 0x0029, "CameraOff" }, { 0x08, 0x002a, "OnLine" }, { 0x08, 0x002b, "OffLine" }, { 0x08, 0x002c, "Busy" }, { 0x08, 0x002d, "Ready" }, { 0x08, 0x002e, "PaperOut" }, { 0x08, 0x002f, "PaperJam" }, { 0x08, 0x0030, "Remote" }, { 0x08, 0x0031, "Forward" }, { 0x08, 0x0032, "Reverse" }, { 0x08, 0x0033, "Stop" }, { 0x08, 0x0034, "Rewind" }, { 0x08, 0x0035, "FastForward" }, { 0x08, 0x0036, "Play" }, { 0x08, 0x0037, "Pause" }, { 0x08, 0x0038, "Record" }, { 0x08, 0x0039, "Error" }, { 0x08, 0x003a, "UsageSelectedIndicator" }, { 0x08, 0x003b, "UsageInUseIndicator" }, { 0x08, 0x003c, "UsageMultiModeIndicator" }, { 0x08, 0x003d, "IndicatorOn" }, { 0x08, 0x003e, "IndicatorFlash" }, { 0x08, 0x003f, "IndicatorSlowBlink" }, { 0x08, 0x0040, "IndicatorFastBlink" }, { 0x08, 0x0041, "IndicatorOff" }, { 0x08, 0x0042, "FlashOnTime" }, { 0x08, 0x0043, "SlowBlinkOnTime" }, { 0x08, 0x0044, "SlowBlinkOffTime" }, { 0x08, 0x0045, "FastBlinkOnTime" }, { 0x08, 0x0046, "FastBlinkOffTime" }, { 0x08, 0x0047, "UsageIndicatorColor" }, { 0x08, 0x0048, "IndicatorRed" }, { 0x08, 0x0049, "IndicatorGreen" }, { 0x08, 0x004a, "IndicatorAmber" }, { 0x08, 0x004b, "GenericIndicator" }, { 0x08, 0x004c, "SystemSuspend" }, { 0x08, 0x004d, "ExternalPowerConnected" }, { 0x08, 0x004e, "IndicatorBlue" }, { 0x08, 0x004f, "IndicatorOrange" }, { 0x08, 0x0050, "GoodStatus" }, { 0x08, 0x0051, "WarningStatus" }, { 0x08, 0x0052, "RGBLED" }, { 0x08, 0x0053, "RedLEDChannel" }, { 0x08, 0x0054, "BlueLEDChannel" }, { 0x08, 0x0055, "GreenLEDChannel" }, { 0x08, 0x0056, "LEDIntensity" }, { 0x08, 0x0057, "SystemMicrophoneMute" }, { 0x08, 0x0060, "PlayerIndicator" }, { 0x08, 0x0061, "Player1" }, { 0x08, 0x0062, "Player2" }, { 0x08, 0x0063, "Player3" }, { 0x08, 0x0064, "Player4" }, { 0x08, 0x0065, "Player5" }, { 0x08, 0x0066, "Player6" }, { 0x08, 0x0067, "Player7" }, { 0x08, 0x0068, "Player8" }, { 0x09, 0, "Button" }, { 0x0a, 0, "Ordinal" }, { 0x0b, 0, "TelephonyDevice" }, { 0x0b, 0x0001, "Phone" }, { 0x0b, 0x0002, "AnsweringMachine" }, { 0x0b, 0x0003, "MessageControls" }, { 0x0b, 0x0004, "Handset" }, { 0x0b, 0x0005, "Headset" }, { 0x0b, 0x0006, "TelephonyKeyPad" }, { 0x0b, 0x0007, "ProgrammableButton" }, { 0x0b, 0x0020, "HookSwitch" }, { 0x0b, 0x0021, "Flash" }, { 0x0b, 0x0022, "Feature" }, { 0x0b, 0x0023, "Hold" }, { 0x0b, 0x0024, "Redial" }, { 0x0b, 0x0025, "Transfer" }, { 0x0b, 0x0026, "Drop" }, { 0x0b, 0x0027, "Park" }, { 0x0b, 0x0028, "ForwardCalls" }, { 0x0b, 0x0029, "AlternateFunction" }, { 0x0b, 0x002a, "Line" }, { 0x0b, 0x002b, "SpeakerPhone" }, { 0x0b, 0x002c, "Conference" }, { 0x0b, 0x002d, "RingEnable" }, { 0x0b, 0x002e, "RingSelect" }, { 0x0b, 0x002f, "PhoneMute" }, { 0x0b, 0x0030, "CallerID" }, { 0x0b, 0x0031, "Send" }, { 0x0b, 0x0050, "SpeedDial" }, { 0x0b, 0x0051, "StoreNumber" }, { 0x0b, 0x0052, "RecallNumber" }, { 0x0b, 0x0053, "PhoneDirectory" }, { 0x0b, 0x0070, "VoiceMail" }, { 0x0b, 0x0071, "ScreenCalls" }, { 0x0b, 0x0072, "DoNotDisturb" }, { 0x0b, 0x0073, "Message" }, { 0x0b, 0x0074, "AnswerOnOff" }, { 0x0b, 0x0090, "InsideDialTone" }, { 0x0b, 0x0091, "OutsideDialTone" }, { 0x0b, 0x0092, "InsideRingTone" }, { 0x0b, 0x0093, "OutsideRingTone" }, { 0x0b, 0x0094, "PriorityRingTone" }, { 0x0b, 0x0095, "InsideRingback" }, { 0x0b, 0x0096, "PriorityRingback" }, { 0x0b, 0x0097, "LineBusyTone" }, { 0x0b, 0x0098, "ReorderTone" }, { 0x0b, 0x0099, "CallWaitingTone" }, { 0x0b, 0x009a, "ConfirmationTone1" }, { 0x0b, 0x009b, "ConfirmationTone2" }, { 0x0b, 0x009c, "TonesOff" }, { 0x0b, 0x009d, "OutsideRingback" }, { 0x0b, 0x009e, "Ringer" }, { 0x0b, 0x00b0, "PhoneKey0" }, { 0x0b, 0x00b1, "PhoneKey1" }, { 0x0b, 0x00b2, "PhoneKey2" }, { 0x0b, 0x00b3, "PhoneKey3" }, { 0x0b, 0x00b4, "PhoneKey4" }, { 0x0b, 0x00b5, "PhoneKey5" }, { 0x0b, 0x00b6, "PhoneKey6" }, { 0x0b, 0x00b7, "PhoneKey7" }, { 0x0b, 0x00b8, "PhoneKey8" }, { 0x0b, 0x00b9, "PhoneKey9" }, { 0x0b, 0x00ba, "PhoneKeyStar" }, { 0x0b, 0x00bb, "PhoneKeyPound" }, { 0x0b, 0x00bc, "PhoneKeyA" }, { 0x0b, 0x00bd, "PhoneKeyB" }, { 0x0b, 0x00be, "PhoneKeyC" }, { 0x0b, 0x00bf, "PhoneKeyD" }, { 0x0b, 0x00c0, "PhoneCallHistoryKey" }, { 0x0b, 0x00c1, "PhoneCallerIDKey" }, { 0x0b, 0x00c2, "PhoneSettingsKey" }, { 0x0b, 0x00f0, "HostControl" }, { 0x0b, 0x00f1, "HostAvailable" }, { 0x0b, 0x00f2, "HostCallActive" }, { 0x0b, 0x00f3, "ActivateHandsetAudio" }, { 0x0b, 0x00f4, "RingType" }, { 0x0b, 0x00f5, "RedialablePhoneNumber" }, { 0x0b, 0x00f8, "StopRingTone" }, { 0x0b, 0x00f9, "PSTNRingTone" }, { 0x0b, 0x00fa, "HostRingTone" }, { 0x0b, 0x00fb, "AlertSoundError" }, { 0x0b, 0x00fc, "AlertSoundConfirm" }, { 0x0b, 0x00fd, "AlertSoundNotification" }, { 0x0b, 0x00fe, "SilentRing" }, { 0x0b, 0x0108, "EmailMessageWaiting" }, { 0x0b, 0x0109, "VoicemailMessageWaiting" }, { 0x0b, 0x010a, "HostHold" }, { 0x0b, 0x0110, "IncomingCallHistoryCount" }, { 0x0b, 0x0111, "OutgoingCallHistoryCount" }, { 0x0b, 0x0112, "IncomingCallHistory" }, { 0x0b, 0x0113, "OutgoingCallHistory" }, { 0x0b, 0x0114, "PhoneLocale" }, { 0x0b, 0x0140, "PhoneTimeSecond" }, { 0x0b, 0x0141, "PhoneTimeMinute" }, { 0x0b, 0x0142, "PhoneTimeHour" }, { 0x0b, 0x0143, "PhoneDateDay" }, { 0x0b, 0x0144, "PhoneDateMonth" }, { 0x0b, 0x0145, "PhoneDateYear" }, { 0x0b, 0x0146, "HandsetNickname" }, { 0x0b, 0x0147, "AddressBookID" }, { 0x0b, 0x014a, "CallDuration" }, { 0x0b, 0x014b, "DualModePhone" }, { 0x0c, 0, "Consumer" }, { 0x0c, 0x0001, "ConsumerControl" }, { 0x0c, 0x0002, "NumericKeyPad" }, { 0x0c, 0x0003, "ProgrammableButtons" }, { 0x0c, 0x0004, "Microphone" }, { 0x0c, 0x0005, "Headphone" }, { 0x0c, 0x0006, "GraphicEqualizer" }, { 0x0c, 0x0020, "10" }, { 0x0c, 0x0021, "100" }, { 0x0c, 0x0022, "AMPM" }, { 0x0c, 0x0030, "Power" }, { 0x0c, 0x0031, "Reset" }, { 0x0c, 0x0032, "Sleep" }, { 0x0c, 0x0033, "SleepAfter" }, { 0x0c, 0x0034, "SleepMode" }, { 0x0c, 0x0035, "Illumination" }, { 0x0c, 0x0036, "FunctionButtons" }, { 0x0c, 0x0040, "Menu" }, { 0x0c, 0x0041, "MenuPick" }, { 0x0c, 0x0042, "MenuUp" }, { 0x0c, 0x0043, "MenuDown" }, { 0x0c, 0x0044, "MenuLeft" }, { 0x0c, 0x0045, "MenuRight" }, { 0x0c, 0x0046, "MenuEscape" }, { 0x0c, 0x0047, "MenuValueIncrease" }, { 0x0c, 0x0048, "MenuValueDecrease" }, { 0x0c, 0x0060, "DataOnScreen" }, { 0x0c, 0x0061, "ClosedCaption" }, { 0x0c, 0x0062, "ClosedCaptionSelect" }, { 0x0c, 0x0063, "VCRTV" }, { 0x0c, 0x0064, "BroadcastMode" }, { 0x0c, 0x0065, "Snapshot" }, { 0x0c, 0x0066, "Still" }, { 0x0c, 0x0067, "PictureinPictureToggle" }, { 0x0c, 0x0068, "PictureinPictureSwap" }, { 0x0c, 0x0069, "RedMenuButton" }, { 0x0c, 0x006a, "GreenMenuButton" }, { 0x0c, 0x006b, "BlueMenuButton" }, { 0x0c, 0x006c, "YellowMenuButton" }, { 0x0c, 0x006d, "Aspect" }, { 0x0c, 0x006e, "3DModeSelect" }, { 0x0c, 0x006f, "DisplayBrightnessIncrement" }, { 0x0c, 0x0070, "DisplayBrightnessDecrement" }, { 0x0c, 0x0071, "DisplayBrightness" }, { 0x0c, 0x0072, "DisplayBacklightToggle" }, { 0x0c, 0x0073, "DisplaySetBrightnesstoMinimum" }, { 0x0c, 0x0074, "DisplaySetBrightnesstoMaximum" }, { 0x0c, 0x0075, "DisplaySetAutoBrightness" }, { 0x0c, 0x0076, "CameraAccessEnabled" }, { 0x0c, 0x0077, "CameraAccessDisabled" }, { 0x0c, 0x0078, "CameraAccessToggle" }, { 0x0c, 0x0079, "KeyboardBrightnessIncrement" }, { 0x0c, 0x007a, "KeyboardBrightnessDecrement" }, { 0x0c, 0x007b, "KeyboardBacklightSetLevel" }, { 0x0c, 0x007c, "KeyboardBacklightOOC" }, { 0x0c, 0x007d, "KeyboardBacklightSetMinimum" }, { 0x0c, 0x007e, "KeyboardBacklightSetMaximum" }, { 0x0c, 0x007f, "KeyboardBacklightAuto" }, { 0x0c, 0x0080, "Selection" }, { 0x0c, 0x0081, "AssignSelection" }, { 0x0c, 0x0082, "ModeStep" }, { 0x0c, 0x0083, "RecallLast" }, { 0x0c, 0x0084, "EnterChannel" }, { 0x0c, 0x0085, "OrderMovie" }, { 0x0c, 0x0086, "Channel" }, { 0x0c, 0x0087, "MediaSelection" }, { 0x0c, 0x0088, "MediaSelectComputer" }, { 0x0c, 0x0089, "MediaSelectTV" }, { 0x0c, 0x008a, "MediaSelectWWW" }, { 0x0c, 0x008b, "MediaSelectDVD" }, { 0x0c, 0x008c, "MediaSelectTelephone" }, { 0x0c, 0x008d, "MediaSelectProgramGuide" }, { 0x0c, 0x008e, "MediaSelectVideoPhone" }, { 0x0c, 0x008f, "MediaSelectGames" }, { 0x0c, 0x0090, "MediaSelectMessages" }, { 0x0c, 0x0091, "MediaSelectCD" }, { 0x0c, 0x0092, "MediaSelectVCR" }, { 0x0c, 0x0093, "MediaSelectTuner" }, { 0x0c, 0x0094, "Quit" }, { 0x0c, 0x0095, "Help" }, { 0x0c, 0x0096, "MediaSelectTape" }, { 0x0c, 0x0097, "MediaSelectCable" }, { 0x0c, 0x0098, "MediaSelectSatellite" }, { 0x0c, 0x0099, "MediaSelectSecurity" }, { 0x0c, 0x009a, "MediaSelectHome" }, { 0x0c, 0x009b, "MediaSelectCall" }, { 0x0c, 0x009c, "ChannelIncrement" }, { 0x0c, 0x009d, "ChannelDecrement" }, { 0x0c, 0x009e, "MediaSelectSAP" }, { 0x0c, 0x00a0, "VCRPlus" }, { 0x0c, 0x00a1, "Once" }, { 0x0c, 0x00a2, "Daily" }, { 0x0c, 0x00a3, "Weekly" }, { 0x0c, 0x00a4, "Monthly" }, { 0x0c, 0x00b0, "Play" }, { 0x0c, 0x00b1, "Pause" }, { 0x0c, 0x00b2, "Record" }, { 0x0c, 0x00b3, "FastForward" }, { 0x0c, 0x00b4, "Rewind" }, { 0x0c, 0x00b5, "ScanNextTrack" }, { 0x0c, 0x00b6, "ScanPreviousTrack" }, { 0x0c, 0x00b7, "Stop" }, { 0x0c, 0x00b8, "Eject" }, { 0x0c, 0x00b9, "RandomPlay" }, { 0x0c, 0x00ba, "SelectDisc" }, { 0x0c, 0x00bb, "EnterDisc" }, { 0x0c, 0x00bc, "Repeat" }, { 0x0c, 0x00bd, "Tracking" }, { 0x0c, 0x00be, "TrackNormal" }, { 0x0c, 0x00bf, "SlowTracking" }, { 0x0c, 0x00c0, "FrameForward" }, { 0x0c, 0x00c1, "FrameBack" }, { 0x0c, 0x00c2, "Mark" }, { 0x0c, 0x00c3, "ClearMark" }, { 0x0c, 0x00c4, "RepeatFromMark" }, { 0x0c, 0x00c5, "ReturnToMark" }, { 0x0c, 0x00c6, "SearchMarkForward" }, { 0x0c, 0x00c7, "SearchMarkBackwards" }, { 0x0c, 0x00c8, "CounterReset" }, { 0x0c, 0x00c9, "ShowCounter" }, { 0x0c, 0x00ca, "TrackingIncrement" }, { 0x0c, 0x00cb, "TrackingDecrement" }, { 0x0c, 0x00cc, "StopEject" }, { 0x0c, 0x00cd, "PlayPause" }, { 0x0c, 0x00ce, "PlaySkip" }, { 0x0c, 0x00cf, "VoiceCommand" }, { 0x0c, 0x00d0, "InvokeCaptureInterface" }, { 0x0c, 0x00d1, "StartorStopGameRecording" }, { 0x0c, 0x00d2, "HistoricalGameCapture" }, { 0x0c, 0x00d3, "CaptureGameScreenshot" }, { 0x0c, 0x00d4, "ShoworHideRecordingIndicator" }, { 0x0c, 0x00d5, "StartorStopMicrophoneCapture" }, { 0x0c, 0x00d6, "StartorStopCameraCapture" }, { 0x0c, 0x00d7, "StartorStopGameBroadcast" }, { 0x0c, 0x00d8, "StartorStopVoiceDictationSession" }, { 0x0c, 0x00d9, "InvokeDismissEmojiPicker" }, { 0x0c, 0x00e0, "Volume" }, { 0x0c, 0x00e1, "Balance" }, { 0x0c, 0x00e2, "Mute" }, { 0x0c, 0x00e3, "Bass" }, { 0x0c, 0x00e4, "Treble" }, { 0x0c, 0x00e5, "BassBoost" }, { 0x0c, 0x00e6, "SurroundMode" }, { 0x0c, 0x00e7, "Loudness" }, { 0x0c, 0x00e8, "MPX" }, { 0x0c, 0x00e9, "VolumeIncrement" }, { 0x0c, 0x00ea, "VolumeDecrement" }, { 0x0c, 0x00f0, "SpeedSelect" }, { 0x0c, 0x00f1, "PlaybackSpeed" }, { 0x0c, 0x00f2, "StandardPlay" }, { 0x0c, 0x00f3, "LongPlay" }, { 0x0c, 0x00f4, "ExtendedPlay" }, { 0x0c, 0x00f5, "Slow" }, { 0x0c, 0x0100, "FanEnable" }, { 0x0c, 0x0101, "FanSpeed" }, { 0x0c, 0x0102, "LightEnable" }, { 0x0c, 0x0103, "LightIlluminationLevel" }, { 0x0c, 0x0104, "ClimateControlEnable" }, { 0x0c, 0x0105, "RoomTemperature" }, { 0x0c, 0x0106, "SecurityEnable" }, { 0x0c, 0x0107, "FireAlarm" }, { 0x0c, 0x0108, "PoliceAlarm" }, { 0x0c, 0x0109, "Proximity" }, { 0x0c, 0x010a, "Motion" }, { 0x0c, 0x010b, "DuressAlarm" }, { 0x0c, 0x010c, "HoldupAlarm" }, { 0x0c, 0x010d, "MedicalAlarm" }, { 0x0c, 0x0150, "BalanceRight" }, { 0x0c, 0x0151, "BalanceLeft" }, { 0x0c, 0x0152, "BassIncrement" }, { 0x0c, 0x0153, "BassDecrement" }, { 0x0c, 0x0154, "TrebleIncrement" }, { 0x0c, 0x0155, "TrebleDecrement" }, { 0x0c, 0x0160, "SpeakerSystem" }, { 0x0c, 0x0161, "ChannelLeft" }, { 0x0c, 0x0162, "ChannelRight" }, { 0x0c, 0x0163, "ChannelCenter" }, { 0x0c, 0x0164, "ChannelFront" }, { 0x0c, 0x0165, "ChannelCenterFront" }, { 0x0c, 0x0166, "ChannelSide" }, { 0x0c, 0x0167, "ChannelSurround" }, { 0x0c, 0x0168, "ChannelLowFrequencyEnhancement" }, { 0x0c, 0x0169, "ChannelTop" }, { 0x0c, 0x016a, "ChannelUnknown" }, { 0x0c, 0x0170, "Subchannel" }, { 0x0c, 0x0171, "SubchannelIncrement" }, { 0x0c, 0x0172, "SubchannelDecrement" }, { 0x0c, 0x0173, "AlternateAudioIncrement" }, { 0x0c, 0x0174, "AlternateAudioDecrement" }, { 0x0c, 0x0180, "ApplicationLaunchButtons" }, { 0x0c, 0x0181, "ALLaunchButtonConfigurationTool" }, { 0x0c, 0x0182, "ALProgrammableButtonConfiguration" }, { 0x0c, 0x0183, "ALConsumerControlConfiguration" }, { 0x0c, 0x0184, "ALWordProcessor" }, { 0x0c, 0x0185, "ALTextEditor" }, { 0x0c, 0x0186, "ALSpreadsheet" }, { 0x0c, 0x0187, "ALGraphicsEditor" }, { 0x0c, 0x0188, "ALPresentationApp" }, { 0x0c, 0x0189, "ALDatabaseApp" }, { 0x0c, 0x018a, "ALEmailReader" }, { 0x0c, 0x018b, "ALNewsreader" }, { 0x0c, 0x018c, "ALVoicemail" }, { 0x0c, 0x018d, "ALContactsAddressBook" }, { 0x0c, 0x018e, "ALCalendarSchedule" }, { 0x0c, 0x018f, "ALTaskProjectManager" }, { 0x0c, 0x0190, "ALLogJournalTimecard" }, { 0x0c, 0x0191, "ALCheckbookFinance" }, { 0x0c, 0x0192, "ALCalculator" }, { 0x0c, 0x0193, "ALAVCapturePlayback" }, { 0x0c, 0x0194, "ALLocalMachineBrowser" }, { 0x0c, 0x0195, "ALLANWANBrowser" }, { 0x0c, 0x0196, "ALInternetBrowser" }, { 0x0c, 0x0197, "ALRemoteNetworkingISPConnect" }, { 0x0c, 0x0198, "ALNetworkConference" }, { 0x0c, 0x0199, "ALNetworkChat" }, { 0x0c, 0x019a, "ALTelephonyDialer" }, { 0x0c, 0x019b, "ALLogon" }, { 0x0c, 0x019c, "ALLogoff" }, { 0x0c, 0x019d, "ALLogonLogoff" }, { 0x0c, 0x019e, "ALTerminalLockScreensaver" }, { 0x0c, 0x019f, "ALControlPanel" }, { 0x0c, 0x01a0, "ALCommandLineProcessorRun" }, { 0x0c, 0x01a1, "ALProcessTaskManager" }, { 0x0c, 0x01a2, "ALSelectTaskApplication" }, { 0x0c, 0x01a3, "ALNextTaskApplication" }, { 0x0c, 0x01a4, "ALPreviousTaskApplication" }, { 0x0c, 0x01a5, "ALPreemptiveHaltTaskApplication" }, { 0x0c, 0x01a6, "ALIntegratedHelpCenter" }, { 0x0c, 0x01a7, "ALDocuments" }, { 0x0c, 0x01a8, "ALThesaurus" }, { 0x0c, 0x01a9, "ALDictionary" }, { 0x0c, 0x01aa, "ALDesktop" }, { 0x0c, 0x01ab, "ALSpellCheck" }, { 0x0c, 0x01ac, "ALGrammarCheck" }, { 0x0c, 0x01ad, "ALWirelessStatus" }, { 0x0c, 0x01ae, "ALKeyboardLayout" }, { 0x0c, 0x01af, "ALVirusProtection" }, { 0x0c, 0x01b0, "ALEncryption" }, { 0x0c, 0x01b1, "ALScreenSaver" }, { 0x0c, 0x01b2, "ALAlarms" }, { 0x0c, 0x01b3, "ALClock" }, { 0x0c, 0x01b4, "ALFileBrowser" }, { 0x0c, 0x01b5, "ALPowerStatus" }, { 0x0c, 0x01b6, "ALImageBrowser" }, { 0x0c, 0x01b7, "ALAudioBrowser" }, { 0x0c, 0x01b8, "ALMovieBrowser" }, { 0x0c, 0x01b9, "ALDigitalRightsManager" }, { 0x0c, 0x01ba, "ALDigitalWallet" }, { 0x0c, 0x01bc, "ALInstantMessaging" }, { 0x0c, 0x01bd, "ALOEMFeaturesTipsTutorialBrowser" }, { 0x0c, 0x01be, "ALOEMHelp" }, { 0x0c, 0x01bf, "ALOnlineCommunity" }, { 0x0c, 0x01c0, "ALEntertainmentContentBrowser" }, { 0x0c, 0x01c1, "ALOnlineShoppingBrowser" }, { 0x0c, 0x01c2, "ALSmartCardInformationHelp" }, { 0x0c, 0x01c3, "ALMarketMonitorFinanceBrowser" }, { 0x0c, 0x01c4, "ALCustomizedCorporateNewsBrowser" }, { 0x0c, 0x01c5, "ALOnlineActivityBrowser" }, { 0x0c, 0x01c6, "ALResearchSearchBrowser" }, { 0x0c, 0x01c7, "ALAudioPlayer" }, { 0x0c, 0x01c8, "ALMessageStatus" }, { 0x0c, 0x01c9, "ALContactSync" }, { 0x0c, 0x01ca, "ALNavigation" }, { 0x0c, 0x01cb, "ALContextawareDesktopAssistant" }, { 0x0c, 0x0200, "GenericGUIApplicationControls" }, { 0x0c, 0x0201, "ACNew" }, { 0x0c, 0x0202, "ACOpen" }, { 0x0c, 0x0203, "ACClose" }, { 0x0c, 0x0204, "ACExit" }, { 0x0c, 0x0205, "ACMaximize" }, { 0x0c, 0x0206, "ACMinimize" }, { 0x0c, 0x0207, "ACSave" }, { 0x0c, 0x0208, "ACPrint" }, { 0x0c, 0x0209, "ACProperties" }, { 0x0c, 0x021a, "ACUndo" }, { 0x0c, 0x021b, "ACCopy" }, { 0x0c, 0x021c, "ACCut" }, { 0x0c, 0x021d, "ACPaste" }, { 0x0c, 0x021e, "ACSelectAll" }, { 0x0c, 0x021f, "ACFind" }, { 0x0c, 0x0220, "ACFindandReplace" }, { 0x0c, 0x0221, "ACSearch" }, { 0x0c, 0x0222, "ACGoTo" }, { 0x0c, 0x0223, "ACHome" }, { 0x0c, 0x0224, "ACBack" }, { 0x0c, 0x0225, "ACForward" }, { 0x0c, 0x0226, "ACStop" }, { 0x0c, 0x0227, "ACRefresh" }, { 0x0c, 0x0228, "ACPreviousLink" }, { 0x0c, 0x0229, "ACNextLink" }, { 0x0c, 0x022a, "ACBookmarks" }, { 0x0c, 0x022b, "ACHistory" }, { 0x0c, 0x022c, "ACSubscriptions" }, { 0x0c, 0x022d, "ACZoomIn" }, { 0x0c, 0x022e, "ACZoomOut" }, { 0x0c, 0x022f, "ACZoom" }, { 0x0c, 0x0230, "ACFullScreenView" }, { 0x0c, 0x0231, "ACNormalView" }, { 0x0c, 0x0232, "ACViewToggle" }, { 0x0c, 0x0233, "ACScrollUp" }, { 0x0c, 0x0234, "ACScrollDown" }, { 0x0c, 0x0235, "ACScroll" }, { 0x0c, 0x0236, "ACPanLeft" }, { 0x0c, 0x0237, "ACPanRight" }, { 0x0c, 0x0238, "ACPan" }, { 0x0c, 0x0239, "ACNewWindow" }, { 0x0c, 0x023a, "ACTileHorizontally" }, { 0x0c, 0x023b, "ACTileVertically" }, { 0x0c, 0x023c, "ACFormat" }, { 0x0c, 0x023d, "ACEdit" }, { 0x0c, 0x023e, "ACBold" }, { 0x0c, 0x023f, "ACItalics" }, { 0x0c, 0x0240, "ACUnderline" }, { 0x0c, 0x0241, "ACStrikethrough" }, { 0x0c, 0x0242, "ACSubscript" }, { 0x0c, 0x0243, "ACSuperscript" }, { 0x0c, 0x0244, "ACAllCaps" }, { 0x0c, 0x0245, "ACRotate" }, { 0x0c, 0x0246, "ACResize" }, { 0x0c, 0x0247, "ACFlipHorizontal" }, { 0x0c, 0x0248, "ACFlipVertical" }, { 0x0c, 0x0249, "ACMirrorHorizontal" }, { 0x0c, 0x024a, "ACMirrorVertical" }, { 0x0c, 0x024b, "ACFontSelect" }, { 0x0c, 0x024c, "ACFontColor" }, { 0x0c, 0x024d, "ACFontSize" }, { 0x0c, 0x024e, "ACJustifyLeft" }, { 0x0c, 0x024f, "ACJustifyCenterH" }, { 0x0c, 0x0250, "ACJustifyRight" }, { 0x0c, 0x0251, "ACJustifyBlockH" }, { 0x0c, 0x0252, "ACJustifyTop" }, { 0x0c, 0x0253, "ACJustifyCenterV" }, { 0x0c, 0x0254, "ACJustifyBottom" }, { 0x0c, 0x0255, "ACJustifyBlockV" }, { 0x0c, 0x0256, "ACIndentDecrease" }, { 0x0c, 0x0257, "ACIndentIncrease" }, { 0x0c, 0x0258, "ACNumberedList" }, { 0x0c, 0x0259, "ACRestartNumbering" }, { 0x0c, 0x025a, "ACBulletedList" }, { 0x0c, 0x025b, "ACPromote" }, { 0x0c, 0x025c, "ACDemote" }, { 0x0c, 0x025d, "ACYes" }, { 0x0c, 0x025e, "ACNo" }, { 0x0c, 0x025f, "ACCancel" }, { 0x0c, 0x0260, "ACCatalog" }, { 0x0c, 0x0261, "ACBuyCheckout" }, { 0x0c, 0x0262, "ACAddtoCart" }, { 0x0c, 0x0263, "ACExpand" }, { 0x0c, 0x0264, "ACExpandAll" }, { 0x0c, 0x0265, "ACCollapse" }, { 0x0c, 0x0266, "ACCollapseAll" }, { 0x0c, 0x0267, "ACPrintPreview" }, { 0x0c, 0x0268, "ACPasteSpecial" }, { 0x0c, 0x0269, "ACInsertMode" }, { 0x0c, 0x026a, "ACDelete" }, { 0x0c, 0x026b, "ACLock" }, { 0x0c, 0x026c, "ACUnlock" }, { 0x0c, 0x026d, "ACProtect" }, { 0x0c, 0x026e, "ACUnprotect" }, { 0x0c, 0x026f, "ACAttachComment" }, { 0x0c, 0x0270, "ACDeleteComment" }, { 0x0c, 0x0271, "ACViewComment" }, { 0x0c, 0x0272, "ACSelectWord" }, { 0x0c, 0x0273, "ACSelectSentence" }, { 0x0c, 0x0274, "ACSelectParagraph" }, { 0x0c, 0x0275, "ACSelectColumn" }, { 0x0c, 0x0276, "ACSelectRow" }, { 0x0c, 0x0277, "ACSelectTable" }, { 0x0c, 0x0278, "ACSelectObject" }, { 0x0c, 0x0279, "ACRedoRepeat" }, { 0x0c, 0x027a, "ACSort" }, { 0x0c, 0x027b, "ACSortAscending" }, { 0x0c, 0x027c, "ACSortDescending" }, { 0x0c, 0x027d, "ACFilter" }, { 0x0c, 0x027e, "ACSetClock" }, { 0x0c, 0x027f, "ACViewClock" }, { 0x0c, 0x0280, "ACSelectTimeZone" }, { 0x0c, 0x0281, "ACEditTimeZones" }, { 0x0c, 0x0282, "ACSetAlarm" }, { 0x0c, 0x0283, "ACClearAlarm" }, { 0x0c, 0x0284, "ACSnoozeAlarm" }, { 0x0c, 0x0285, "ACResetAlarm" }, { 0x0c, 0x0286, "ACSynchronize" }, { 0x0c, 0x0287, "ACSendReceive" }, { 0x0c, 0x0288, "ACSendTo" }, { 0x0c, 0x0289, "ACReply" }, { 0x0c, 0x028a, "ACReplyAll" }, { 0x0c, 0x028b, "ACForwardMsg" }, { 0x0c, 0x028c, "ACSend" }, { 0x0c, 0x028d, "ACAttachFile" }, { 0x0c, 0x028e, "ACUpload" }, { 0x0c, 0x028f, "ACDownloadSaveTargetAs" }, { 0x0c, 0x0290, "ACSetBorders" }, { 0x0c, 0x0291, "ACInsertRow" }, { 0x0c, 0x0292, "ACInsertColumn" }, { 0x0c, 0x0293, "ACInsertFile" }, { 0x0c, 0x0294, "ACInsertPicture" }, { 0x0c, 0x0295, "ACInsertObject" }, { 0x0c, 0x0296, "ACInsertSymbol" }, { 0x0c, 0x0297, "ACSaveandClose" }, { 0x0c, 0x0298, "ACRename" }, { 0x0c, 0x0299, "ACMerge" }, { 0x0c, 0x029a, "ACSplit" }, { 0x0c, 0x029b, "ACDisributeHorizontally" }, { 0x0c, 0x029c, "ACDistributeVertically" }, { 0x0c, 0x029d, "ACNextKeyboardLayoutSelect" }, { 0x0c, 0x029e, "ACNavigationGuidance" }, { 0x0c, 0x029f, "ACDesktopShowAllWindows" }, { 0x0c, 0x02a0, "ACSoftKeyLeft" }, { 0x0c, 0x02a1, "ACSoftKeyRight" }, { 0x0c, 0x02a2, "ACDesktopShowAllApplications" }, { 0x0c, 0x02b0, "ACIdleKeepAlive" }, { 0x0c, 0x02c0, "ExtendedKeyboardAttributesCollection" }, { 0x0c, 0x02c1, "KeyboardFormFactor" }, { 0x0c, 0x02c2, "KeyboardKeyType" }, { 0x0c, 0x02c3, "KeyboardPhysicalLayout" }, { 0x0c, 0x02c4, "VendorSpecificKeyboardPhysicalLayout" }, { 0x0c, 0x02c5, "KeyboardIETFLanguageTagIndex" }, { 0x0c, 0x02c6, "ImplementedKeyboardInputAssistControls" }, { 0x0c, 0x02c7, "KeyboardInputAssistPrevious" }, { 0x0c, 0x02c8, "KeyboardInputAssistNext" }, { 0x0c, 0x02c9, "KeyboardInputAssistPreviousGroup" }, { 0x0c, 0x02ca, "KeyboardInputAssistNextGroup" }, { 0x0c, 0x02cb, "KeyboardInputAssistAccept" }, { 0x0c, 0x02cc, "KeyboardInputAssistCancel" }, { 0x0c, 0x02d0, "PrivacyScreenToggle" }, { 0x0c, 0x02d1, "PrivacyScreenLevelDecrement" }, { 0x0c, 0x02d2, "PrivacyScreenLevelIncrement" }, { 0x0c, 0x02d3, "PrivacyScreenLevelMinimum" }, { 0x0c, 0x02d4, "PrivacyScreenLevelMaximum" }, { 0x0c, 0x0500, "ContactEdited" }, { 0x0c, 0x0501, "ContactAdded" }, { 0x0c, 0x0502, "ContactRecordActive" }, { 0x0c, 0x0503, "ContactIndex" }, { 0x0c, 0x0504, "ContactNickname" }, { 0x0c, 0x0505, "ContactFirstName" }, { 0x0c, 0x0506, "ContactLastName" }, { 0x0c, 0x0507, "ContactFullName" }, { 0x0c, 0x0508, "ContactPhoneNumberPersonal" }, { 0x0c, 0x0509, "ContactPhoneNumberBusiness" }, { 0x0c, 0x050a, "ContactPhoneNumberMobile" }, { 0x0c, 0x050b, "ContactPhoneNumberPager" }, { 0x0c, 0x050c, "ContactPhoneNumberFax" }, { 0x0c, 0x050d, "ContactPhoneNumberOther" }, { 0x0c, 0x050e, "ContactEmailPersonal" }, { 0x0c, 0x050f, "ContactEmailBusiness" }, { 0x0c, 0x0510, "ContactEmailOther" }, { 0x0c, 0x0511, "ContactEmailMain" }, { 0x0c, 0x0512, "ContactSpeedDialNumber" }, { 0x0c, 0x0513, "ContactStatusFlag" }, { 0x0c, 0x0514, "ContactMisc" }, { 0x0d, 0, "Digitizers" }, { 0x0d, 0x0001, "Digitizer" }, { 0x0d, 0x0002, "Pen" }, { 0x0d, 0x0003, "LightPen" }, { 0x0d, 0x0004, "TouchScreen" }, { 0x0d, 0x0005, "TouchPad" }, { 0x0d, 0x0006, "Whiteboard" }, { 0x0d, 0x0007, "CoordinateMeasuringMachine" }, { 0x0d, 0x0008, "3DDigitizer" }, { 0x0d, 0x0009, "StereoPlotter" }, { 0x0d, 0x000a, "ArticulatedArm" }, { 0x0d, 0x000b, "Armature" }, { 0x0d, 0x000c, "MultiplePointDigitizer" }, { 0x0d, 0x000d, "FreeSpaceWand" }, { 0x0d, 0x000e, "DeviceConfiguration" }, { 0x0d, 0x000f, "CapacitiveHeatMapDigitizer" }, { 0x0d, 0x0020, "Stylus" }, { 0x0d, 0x0021, "Puck" }, { 0x0d, 0x0022, "Finger" }, { 0x0d, 0x0023, "Devicesettings" }, { 0x0d, 0x0024, "CharacterGesture" }, { 0x0d, 0x0030, "TipPressure" }, { 0x0d, 0x0031, "BarrelPressure" }, { 0x0d, 0x0032, "InRange" }, { 0x0d, 0x0033, "Touch" }, { 0x0d, 0x0034, "Untouch" }, { 0x0d, 0x0035, "Tap" }, { 0x0d, 0x0036, "Quality" }, { 0x0d, 0x0037, "DataValid" }, { 0x0d, 0x0038, "TransducerIndex" }, { 0x0d, 0x0039, "TabletFunctionKeys" }, { 0x0d, 0x003a, "ProgramChangeKeys" }, { 0x0d, 0x003b, "BatteryStrength" }, { 0x0d, 0x003c, "Invert" }, { 0x0d, 0x003d, "XTilt" }, { 0x0d, 0x003e, "YTilt" }, { 0x0d, 0x003f, "Azimuth" }, { 0x0d, 0x0040, "Altitude" }, { 0x0d, 0x0041, "Twist" }, { 0x0d, 0x0042, "TipSwitch" }, { 0x0d, 0x0043, "SecondaryTipSwitch" }, { 0x0d, 0x0044, "BarrelSwitch" }, { 0x0d, 0x0045, "Eraser" }, { 0x0d, 0x0046, "TabletPick" }, { 0x0d, 0x0047, "TouchValid" }, { 0x0d, 0x0048, "Width" }, { 0x0d, 0x0049, "Height" }, { 0x0d, 0x0051, "ContactIdentifier" }, { 0x0d, 0x0052, "DeviceMode" }, { 0x0d, 0x0053, "DeviceIdentifier" }, { 0x0d, 0x0054, "ContactCount" }, { 0x0d, 0x0055, "ContactCountMaximum" }, { 0x0d, 0x0056, "ScanTime" }, { 0x0d, 0x0057, "SurfaceSwitch" }, { 0x0d, 0x0058, "ButtonSwitch" }, { 0x0d, 0x0059, "PadType" }, { 0x0d, 0x005a, "SecondaryBarrelSwitch" }, { 0x0d, 0x005b, "TransducerSerialNumber" }, { 0x0d, 0x005c, "PreferredColor" }, { 0x0d, 0x005d, "PreferredColorisLocked" }, { 0x0d, 0x005e, "PreferredLineWidth" }, { 0x0d, 0x005f, "PreferredLineWidthisLocked" }, { 0x0d, 0x0060, "LatencyMode" }, { 0x0d, 0x0061, "GestureCharacterQuality" }, { 0x0d, 0x0062, "CharacterGestureDataLength" }, { 0x0d, 0x0063, "CharacterGestureData" }, { 0x0d, 0x0064, "GestureCharacterEncoding" }, { 0x0d, 0x0065, "UTF8CharacterGestureEncoding" }, { 0x0d, 0x0066, "UTF16LittleEndianCharacterGestureEncoding" }, { 0x0d, 0x0067, "UTF16BigEndianCharacterGestureEncoding" }, { 0x0d, 0x0068, "UTF32LittleEndianCharacterGestureEncoding" }, { 0x0d, 0x0069, "UTF32BigEndianCharacterGestureEncoding" }, { 0x0d, 0x006a, "CapacitiveHeatMapProtocolVendorID" }, { 0x0d, 0x006b, "CapacitiveHeatMapProtocolVersion" }, { 0x0d, 0x006c, "CapacitiveHeatMapFrameData" }, { 0x0d, 0x006d, "GestureCharacterEnable" }, { 0x0d, 0x006e, "TransducerSerialNumberPart2" }, { 0x0d, 0x006f, "NoPreferredColor" }, { 0x0d, 0x0070, "PreferredLineStyle" }, { 0x0d, 0x0071, "PreferredLineStyleisLocked" }, { 0x0d, 0x0072, "Ink" }, { 0x0d, 0x0073, "Pencil" }, { 0x0d, 0x0074, "Highlighter" }, { 0x0d, 0x0075, "ChiselMarker" }, { 0x0d, 0x0076, "Brush" }, { 0x0d, 0x0077, "NoPreference" }, { 0x0d, 0x0080, "DigitizerDiagnostic" }, { 0x0d, 0x0081, "DigitizerError" }, { 0x0d, 0x0082, "ErrNormalStatus" }, { 0x0d, 0x0083, "ErrTransducersExceeded" }, { 0x0d, 0x0084, "ErrFullTransFeaturesUnavailable" }, { 0x0d, 0x0085, "ErrChargeLow" }, { 0x0d, 0x0090, "TransducerSoftwareInfo" }, { 0x0d, 0x0091, "TransducerVendorId" }, { 0x0d, 0x0092, "TransducerProductId" }, { 0x0d, 0x0093, "DeviceSupportedProtocols" }, { 0x0d, 0x0094, "TransducerSupportedProtocols" }, { 0x0d, 0x0095, "NoProtocol" }, { 0x0d, 0x0096, "WacomAESProtocol" }, { 0x0d, 0x0097, "USIProtocol" }, { 0x0d, 0x0098, "MicrosoftPenProtocol" }, { 0x0d, 0x00a0, "SupportedReportRates" }, { 0x0d, 0x00a1, "ReportRate" }, { 0x0d, 0x00a2, "TransducerConnected" }, { 0x0d, 0x00a3, "SwitchDisabled" }, { 0x0d, 0x00a4, "SwitchUnimplemented" }, { 0x0d, 0x00a5, "TransducerSwitches" }, { 0x0d, 0x00a6, "TransducerIndexSelector" }, { 0x0d, 0x00b0, "ButtonPressThreshold" }, { 0x0e, 0, "Haptics" }, { 0x0e, 0x0001, "SimpleHapticController" }, { 0x0e, 0x0010, "WaveformList" }, { 0x0e, 0x0011, "DurationList" }, { 0x0e, 0x0020, "AutoTrigger" }, { 0x0e, 0x0021, "ManualTrigger" }, { 0x0e, 0x0022, "AutoTriggerAssociatedControl" }, { 0x0e, 0x0023, "Intensity" }, { 0x0e, 0x0024, "RepeatCount" }, { 0x0e, 0x0025, "RetriggerPeriod" }, { 0x0e, 0x0026, "WaveformVendorPage" }, { 0x0e, 0x0027, "WaveformVendorID" }, { 0x0e, 0x0028, "WaveformCutoffTime" }, { 0x0e, 0x1001, "WaveformNone" }, { 0x0e, 0x1002, "WaveformStop" }, { 0x0e, 0x1003, "WaveformClick" }, { 0x0e, 0x1004, "WaveformBuzzContinuous" }, { 0x0e, 0x1005, "WaveformRumbleContinuous" }, { 0x0e, 0x1006, "WaveformPress" }, { 0x0e, 0x1007, "WaveformRelease" }, { 0x0e, 0x1008, "WaveformHover" }, { 0x0e, 0x1009, "WaveformSuccess" }, { 0x0e, 0x100a, "WaveformError" }, { 0x0e, 0x100b, "WaveformInkContinuous" }, { 0x0e, 0x100c, "WaveformPencilContinuous" }, { 0x0e, 0x100d, "WaveformMarkerContinuous" }, { 0x0e, 0x100e, "WaveformChiselMarkerContinuous" }, { 0x0e, 0x100f, "WaveformBrushContinuous" }, { 0x0e, 0x1010, "WaveformEraserContinuous" }, { 0x0e, 0x1011, "WaveformSparkleContinuous" }, { 0x0f, 0, "PhysicalInputDevice" }, { 0x0f, 0x0001, "PhysicalInputDevice" }, { 0x0f, 0x0020, "Normal" }, { 0x0f, 0x0021, "SetEffectReport" }, { 0x0f, 0x0022, "EffectParameterBlockIndex" }, { 0x0f, 0x0023, "ParameterBlockOffset" }, { 0x0f, 0x0024, "ROMFlag" }, { 0x0f, 0x0025, "EffectType" }, { 0x0f, 0x0026, "ETConstantForce" }, { 0x0f, 0x0027, "ETRamp" }, { 0x0f, 0x0028, "ETCustomForce" }, { 0x0f, 0x0030, "ETSquare" }, { 0x0f, 0x0031, "ETSine" }, { 0x0f, 0x0032, "ETTriangle" }, { 0x0f, 0x0033, "ETSawtoothUp" }, { 0x0f, 0x0034, "ETSawtoothDown" }, { 0x0f, 0x0040, "ETSpring" }, { 0x0f, 0x0041, "ETDamper" }, { 0x0f, 0x0042, "ETInertia" }, { 0x0f, 0x0043, "ETFriction" }, { 0x0f, 0x0050, "Duration" }, { 0x0f, 0x0051, "SamplePeriod" }, { 0x0f, 0x0052, "Gain" }, { 0x0f, 0x0053, "TriggerButton" }, { 0x0f, 0x0054, "TriggerRepeatInterval" }, { 0x0f, 0x0055, "AxesEnable" }, { 0x0f, 0x0056, "DirectionEnable" }, { 0x0f, 0x0057, "Direction" }, { 0x0f, 0x0058, "TypeSpecificBlockOffset" }, { 0x0f, 0x0059, "BlockType" }, { 0x0f, 0x005a, "SetEnvelopeReport" }, { 0x0f, 0x005b, "AttackLevel" }, { 0x0f, 0x005c, "AttackTime" }, { 0x0f, 0x005d, "FadeLevel" }, { 0x0f, 0x005e, "FadeTime" }, { 0x0f, 0x005f, "SetConditionReport" }, { 0x0f, 0x0060, "CenterPointOffset" }, { 0x0f, 0x0061, "PositiveCoefficient" }, { 0x0f, 0x0062, "NegativeCoefficient" }, { 0x0f, 0x0063, "PositiveSaturation" }, { 0x0f, 0x0064, "NegativeSaturation" }, { 0x0f, 0x0065, "DeadBand" }, { 0x0f, 0x0066, "DownloadForceSample" }, { 0x0f, 0x0067, "IsochCustomForceEnable" }, { 0x0f, 0x0068, "CustomForceDataReport" }, { 0x0f, 0x0069, "CustomForceData" }, { 0x0f, 0x006a, "CustomForceVendorDefinedData" }, { 0x0f, 0x006b, "SetCustomForceReport" }, { 0x0f, 0x006c, "CustomForceDataOffset" }, { 0x0f, 0x006d, "SampleCount" }, { 0x0f, 0x006e, "SetPeriodicReport" }, { 0x0f, 0x006f, "Offset" }, { 0x0f, 0x0070, "Magnitude" }, { 0x0f, 0x0071, "Phase" }, { 0x0f, 0x0072, "Period" }, { 0x0f, 0x0073, "SetConstantForceReport" }, { 0x0f, 0x0074, "SetRampForceReport" }, { 0x0f, 0x0075, "RampStart" }, { 0x0f, 0x0076, "RampEnd" }, { 0x0f, 0x0077, "EffectOperationReport" }, { 0x0f, 0x0078, "EffectOperation" }, { 0x0f, 0x0079, "OpEffectStart" }, { 0x0f, 0x007a, "OpEffectStartSolo" }, { 0x0f, 0x007b, "OpEffectStop" }, { 0x0f, 0x007c, "LoopCount" }, { 0x0f, 0x007d, "DeviceGainReport" }, { 0x0f, 0x007e, "DeviceGain" }, { 0x0f, 0x007f, "ParameterBlockPoolsReport" }, { 0x0f, 0x0080, "RAMPoolSize" }, { 0x0f, 0x0081, "ROMPoolSize" }, { 0x0f, 0x0082, "ROMEffectBlockCount" }, { 0x0f, 0x0083, "SimultaneousEffectsMax" }, { 0x0f, 0x0084, "PoolAlignment" }, { 0x0f, 0x0085, "ParameterBlockMoveReport" }, { 0x0f, 0x0086, "MoveSource" }, { 0x0f, 0x0087, "MoveDestination" }, { 0x0f, 0x0088, "MoveLength" }, { 0x0f, 0x0089, "EffectParameterBlockLoadReport" }, { 0x0f, 0x008b, "EffectParameterBlockLoadStatus" }, { 0x0f, 0x008c, "BlockLoadSuccess" }, { 0x0f, 0x008d, "BlockLoadFull" }, { 0x0f, 0x008e, "BlockLoadError" }, { 0x0f, 0x008f, "BlockHandle" }, { 0x0f, 0x0090, "EffectParameterBlockFreeReport" }, { 0x0f, 0x0091, "TypeSpecificBlockHandle" }, { 0x0f, 0x0092, "PIDStateReport" }, { 0x0f, 0x0094, "EffectPlaying" }, { 0x0f, 0x0095, "PIDDeviceControlReport" }, { 0x0f, 0x0096, "PIDDeviceControl" }, { 0x0f, 0x0097, "DCEnableActuators" }, { 0x0f, 0x0098, "DCDisableActuators" }, { 0x0f, 0x0099, "DCStopAllEffects" }, { 0x0f, 0x009a, "DCReset" }, { 0x0f, 0x009b, "DCPause" }, { 0x0f, 0x009c, "DCContinue" }, { 0x0f, 0x009f, "DevicePaused" }, { 0x0f, 0x00a0, "ActuatorsEnabled" }, { 0x0f, 0x00a4, "SafetySwitch" }, { 0x0f, 0x00a5, "ActuatorOverrideSwitch" }, { 0x0f, 0x00a6, "ActuatorPower" }, { 0x0f, 0x00a7, "StartDelay" }, { 0x0f, 0x00a8, "ParameterBlockSize" }, { 0x0f, 0x00a9, "DeviceManagedPool" }, { 0x0f, 0x00aa, "SharedParameterBlocks" }, { 0x0f, 0x00ab, "CreateNewEffectParameterBlockReport" }, { 0x0f, 0x00ac, "RAMPoolAvailable" }, { 0x11, 0, "SoC" }, { 0x11, 0x0001, "SocControl" }, { 0x11, 0x0002, "FirmwareTransfer" }, { 0x11, 0x0003, "FirmwareFileId" }, { 0x11, 0x0004, "FileOffsetInBytes" }, { 0x11, 0x0005, "FileTransferSizeMaxInBytes" }, { 0x11, 0x0006, "FilePayload" }, { 0x11, 0x0007, "FilePayloadSizeInBytes" }, { 0x11, 0x0008, "FilePayloadContainsLastBytes" }, { 0x11, 0x0009, "FileTransferStop" }, { 0x11, 0x000a, "FileTransferTillEnd" }, { 0x12, 0, "EyeandHeadTrackers" }, { 0x12, 0x0001, "EyeTracker" }, { 0x12, 0x0002, "HeadTracker" }, { 0x12, 0x0010, "TrackingData" }, { 0x12, 0x0011, "Capabilities" }, { 0x12, 0x0012, "Configuration" }, { 0x12, 0x0013, "Status" }, { 0x12, 0x0014, "Control" }, { 0x12, 0x0020, "SensorTimestamp" }, { 0x12, 0x0021, "PositionX" }, { 0x12, 0x0022, "PositionY" }, { 0x12, 0x0023, "PositionZ" }, { 0x12, 0x0024, "GazePoint" }, { 0x12, 0x0025, "LeftEyePosition" }, { 0x12, 0x0026, "RightEyePosition" }, { 0x12, 0x0027, "HeadPosition" }, { 0x12, 0x0028, "HeadDirectionPoint" }, { 0x12, 0x0029, "RotationaboutXaxis" }, { 0x12, 0x002a, "RotationaboutYaxis" }, { 0x12, 0x002b, "RotationaboutZaxis" }, { 0x12, 0x0100, "TrackerQuality" }, { 0x12, 0x0101, "MinimumTrackingDistance" }, { 0x12, 0x0102, "OptimumTrackingDistance" }, { 0x12, 0x0103, "MaximumTrackingDistance" }, { 0x12, 0x0104, "MaximumScreenPlaneWidth" }, { 0x12, 0x0105, "MaximumScreenPlaneHeight" }, { 0x12, 0x0200, "DisplayManufacturerID" }, { 0x12, 0x0201, "DisplayProductID" }, { 0x12, 0x0202, "DisplaySerialNumber" }, { 0x12, 0x0203, "DisplayManufacturerDate" }, { 0x12, 0x0204, "CalibratedScreenWidth" }, { 0x12, 0x0205, "CalibratedScreenHeight" }, { 0x12, 0x0300, "SamplingFrequency" }, { 0x12, 0x0301, "ConfigurationStatus" }, { 0x12, 0x0400, "DeviceModeRequest" }, { 0x14, 0, "AuxiliaryDisplay" }, { 0x14, 0x0001, "AlphanumericDisplay" }, { 0x14, 0x0002, "AuxiliaryDisplay" }, { 0x14, 0x0020, "DisplayAttributesReport" }, { 0x14, 0x0021, "ASCIICharacterSet" }, { 0x14, 0x0022, "DataReadBack" }, { 0x14, 0x0023, "FontReadBack" }, { 0x14, 0x0024, "DisplayControlReport" }, { 0x14, 0x0025, "ClearDisplay" }, { 0x14, 0x0026, "DisplayEnable" }, { 0x14, 0x0027, "ScreenSaverDelay" }, { 0x14, 0x0028, "ScreenSaverEnable" }, { 0x14, 0x0029, "VerticalScroll" }, { 0x14, 0x002a, "HorizontalScroll" }, { 0x14, 0x002b, "CharacterReport" }, { 0x14, 0x002c, "DisplayData" }, { 0x14, 0x002d, "DisplayStatus" }, { 0x14, 0x002e, "StatNotReady" }, { 0x14, 0x002f, "StatReady" }, { 0x14, 0x0030, "ErrNotaloadablecharacter" }, { 0x14, 0x0031, "ErrFontdatacannotberead" }, { 0x14, 0x0032, "CursorPositionReport" }, { 0x14, 0x0033, "Row" }, { 0x14, 0x0034, "Column" }, { 0x14, 0x0035, "Rows" }, { 0x14, 0x0036, "Columns" }, { 0x14, 0x0037, "CursorPixelPositioning" }, { 0x14, 0x0038, "CursorMode" }, { 0x14, 0x0039, "CursorEnable" }, { 0x14, 0x003a, "CursorBlink" }, { 0x14, 0x003b, "FontReport" }, { 0x14, 0x003c, "FontData" }, { 0x14, 0x003d, "CharacterWidth" }, { 0x14, 0x003e, "CharacterHeight" }, { 0x14, 0x003f, "CharacterSpacingHorizontal" }, { 0x14, 0x0040, "CharacterSpacingVertical" }, { 0x14, 0x0041, "UnicodeCharacterSet" }, { 0x14, 0x0042, "Font7Segment" }, { 0x14, 0x0043, "7SegmentDirectMap" }, { 0x14, 0x0044, "Font14Segment" }, { 0x14, 0x0045, "14SegmentDirectMap" }, { 0x14, 0x0046, "DisplayBrightness" }, { 0x14, 0x0047, "DisplayContrast" }, { 0x14, 0x0048, "CharacterAttribute" }, { 0x14, 0x0049, "AttributeReadback" }, { 0x14, 0x004a, "AttributeData" }, { 0x14, 0x004b, "CharAttrEnhance" }, { 0x14, 0x004c, "CharAttrUnderline" }, { 0x14, 0x004d, "CharAttrBlink" }, { 0x14, 0x0080, "BitmapSizeX" }, { 0x14, 0x0081, "BitmapSizeY" }, { 0x14, 0x0082, "MaxBlitSize" }, { 0x14, 0x0083, "BitDepthFormat" }, { 0x14, 0x0084, "DisplayOrientation" }, { 0x14, 0x0085, "PaletteReport" }, { 0x14, 0x0086, "PaletteDataSize" }, { 0x14, 0x0087, "PaletteDataOffset" }, { 0x14, 0x0088, "PaletteData" }, { 0x14, 0x008a, "BlitReport" }, { 0x14, 0x008b, "BlitRectangleX1" }, { 0x14, 0x008c, "BlitRectangleY1" }, { 0x14, 0x008d, "BlitRectangleX2" }, { 0x14, 0x008e, "BlitRectangleY2" }, { 0x14, 0x008f, "BlitData" }, { 0x14, 0x0090, "SoftButton" }, { 0x14, 0x0091, "SoftButtonID" }, { 0x14, 0x0092, "SoftButtonSide" }, { 0x14, 0x0093, "SoftButtonOffset1" }, { 0x14, 0x0094, "SoftButtonOffset2" }, { 0x14, 0x0095, "SoftButtonReport" }, { 0x14, 0x00c2, "SoftKeys" }, { 0x14, 0x00cc, "DisplayDataExtensions" }, { 0x14, 0x00cf, "CharacterMapping" }, { 0x14, 0x00dd, "UnicodeEquivalent" }, { 0x14, 0x00df, "CharacterPageMapping" }, { 0x14, 0x00ff, "RequestReport" }, { 0x20, 0, "Sensors" }, { 0x20, 0x0001, "Sensor" }, { 0x20, 0x0010, "Biometric" }, { 0x20, 0x0011, "BiometricHumanPresence" }, { 0x20, 0x0012, "BiometricHumanProximity" }, { 0x20, 0x0013, "BiometricHumanTouch" }, { 0x20, 0x0014, "BiometricBloodPressure" }, { 0x20, 0x0015, "BiometricBodyTemperature" }, { 0x20, 0x0016, "BiometricHeartRate" }, { 0x20, 0x0017, "BiometricHeartRateVariability" }, { 0x20, 0x0018, "BiometricPeripheralOxygenSaturation" }, { 0x20, 0x0019, "BiometricRespiratoryRate" }, { 0x20, 0x0020, "Electrical" }, { 0x20, 0x0021, "ElectricalCapacitance" }, { 0x20, 0x0022, "ElectricalCurrent" }, { 0x20, 0x0023, "ElectricalPower" }, { 0x20, 0x0024, "ElectricalInductance" }, { 0x20, 0x0025, "ElectricalResistance" }, { 0x20, 0x0026, "ElectricalVoltage" }, { 0x20, 0x0027, "ElectricalPotentiometer" }, { 0x20, 0x0028, "ElectricalFrequency" }, { 0x20, 0x0029, "ElectricalPeriod" }, { 0x20, 0x0030, "Environmental" }, { 0x20, 0x0031, "EnvironmentalAtmosphericPressure" }, { 0x20, 0x0032, "EnvironmentalHumidity" }, { 0x20, 0x0033, "EnvironmentalTemperature" }, { 0x20, 0x0034, "EnvironmentalWindDirection" }, { 0x20, 0x0035, "EnvironmentalWindSpeed" }, { 0x20, 0x0036, "EnvironmentalAirQuality" }, { 0x20, 0x0037, "EnvironmentalHeatIndex" }, { 0x20, 0x0038, "EnvironmentalSurfaceTemperature" }, { 0x20, 0x0039, "EnvironmentalVolatileOrganicCompounds" }, { 0x20, 0x003a, "EnvironmentalObjectPresence" }, { 0x20, 0x003b, "EnvironmentalObjectProximity" }, { 0x20, 0x0040, "Light" }, { 0x20, 0x0041, "LightAmbientLight" }, { 0x20, 0x0042, "LightConsumerInfrared" }, { 0x20, 0x0043, "LightInfraredLight" }, { 0x20, 0x0044, "LightVisibleLight" }, { 0x20, 0x0045, "LightUltravioletLight" }, { 0x20, 0x0050, "Location" }, { 0x20, 0x0051, "LocationBroadcast" }, { 0x20, 0x0052, "LocationDeadReckoning" }, { 0x20, 0x0053, "LocationGPSGlobalPositioningSystem" }, { 0x20, 0x0054, "LocationLookup" }, { 0x20, 0x0055, "LocationOther" }, { 0x20, 0x0056, "LocationStatic" }, { 0x20, 0x0057, "LocationTriangulation" }, { 0x20, 0x0060, "Mechanical" }, { 0x20, 0x0061, "MechanicalBooleanSwitch" }, { 0x20, 0x0062, "MechanicalBooleanSwitchArray" }, { 0x20, 0x0063, "MechanicalMultivalueSwitch" }, { 0x20, 0x0064, "MechanicalForce" }, { 0x20, 0x0065, "MechanicalPressure" }, { 0x20, 0x0066, "MechanicalStrain" }, { 0x20, 0x0067, "MechanicalWeight" }, { 0x20, 0x0068, "MechanicalHapticVibrator" }, { 0x20, 0x0069, "MechanicalHallEffectSwitch" }, { 0x20, 0x0070, "Motion" }, { 0x20, 0x0071, "MotionAccelerometer1D" }, { 0x20, 0x0072, "MotionAccelerometer2D" }, { 0x20, 0x0073, "MotionAccelerometer3D" }, { 0x20, 0x0074, "MotionGyrometer1D" }, { 0x20, 0x0075, "MotionGyrometer2D" }, { 0x20, 0x0076, "MotionGyrometer3D" }, { 0x20, 0x0077, "MotionMotionDetector" }, { 0x20, 0x0078, "MotionSpeedometer" }, { 0x20, 0x0079, "MotionAccelerometer" }, { 0x20, 0x007a, "MotionGyrometer" }, { 0x20, 0x007b, "MotionGravityVector" }, { 0x20, 0x007c, "MotionLinearAccelerometer" }, { 0x20, 0x0080, "Orientation" }, { 0x20, 0x0081, "OrientationCompass1D" }, { 0x20, 0x0082, "OrientationCompass2D" }, { 0x20, 0x0083, "OrientationCompass3D" }, { 0x20, 0x0084, "OrientationInclinometer1D" }, { 0x20, 0x0085, "OrientationInclinometer2D" }, { 0x20, 0x0086, "OrientationInclinometer3D" }, { 0x20, 0x0087, "OrientationDistance1D" }, { 0x20, 0x0088, "OrientationDistance2D" }, { 0x20, 0x0089, "OrientationDistance3D" }, { 0x20, 0x008a, "OrientationDeviceOrientation" }, { 0x20, 0x008b, "OrientationCompass" }, { 0x20, 0x008c, "OrientationInclinometer" }, { 0x20, 0x008d, "OrientationDistance" }, { 0x20, 0x008e, "OrientationRelativeOrientation" }, { 0x20, 0x008f, "OrientationSimpleOrientation" }, { 0x20, 0x0090, "Scanner" }, { 0x20, 0x0091, "ScannerBarcode" }, { 0x20, 0x0092, "ScannerRFID" }, { 0x20, 0x0093, "ScannerNFC" }, { 0x20, 0x00a0, "Time" }, { 0x20, 0x00a1, "TimeAlarmTimer" }, { 0x20, 0x00a2, "TimeRealTimeClock" }, { 0x20, 0x00b0, "PersonalActivity" }, { 0x20, 0x00b1, "PersonalActivityActivityDetection" }, { 0x20, 0x00b2, "PersonalActivityDevicePosition" }, { 0x20, 0x00b3, "PersonalActivityFloorTracker" }, { 0x20, 0x00b4, "PersonalActivityPedometer" }, { 0x20, 0x00b5, "PersonalActivityStepDetection" }, { 0x20, 0x00c0, "OrientationExtended" }, { 0x20, 0x00c1, "OrientationExtendedGeomagneticOrientation" }, { 0x20, 0x00c2, "OrientationExtendedMagnetometer" }, { 0x20, 0x00d0, "Gesture" }, { 0x20, 0x00d1, "GestureChassisFlipGesture" }, { 0x20, 0x00d2, "GestureHingeFoldGesture" }, { 0x20, 0x00e0, "Other" }, { 0x20, 0x00e1, "OtherCustom" }, { 0x20, 0x00e2, "OtherGeneric" }, { 0x20, 0x00e3, "OtherGenericEnumerator" }, { 0x20, 0x00e4, "OtherHingeAngle" }, { 0x20, 0x00f0, "VendorReserved1" }, { 0x20, 0x00f1, "VendorReserved2" }, { 0x20, 0x00f2, "VendorReserved3" }, { 0x20, 0x00f3, "VendorReserved4" }, { 0x20, 0x00f4, "VendorReserved5" }, { 0x20, 0x00f5, "VendorReserved6" }, { 0x20, 0x00f6, "VendorReserved7" }, { 0x20, 0x00f7, "VendorReserved8" }, { 0x20, 0x00f8, "VendorReserved9" }, { 0x20, 0x00f9, "VendorReserved10" }, { 0x20, 0x00fa, "VendorReserved11" }, { 0x20, 0x00fb, "VendorReserved12" }, { 0x20, 0x00fc, "VendorReserved13" }, { 0x20, 0x00fd, "VendorReserved14" }, { 0x20, 0x00fe, "VendorReserved15" }, { 0x20, 0x00ff, "VendorReserved16" }, { 0x20, 0x0200, "Event" }, { 0x20, 0x0201, "EventSensorState" }, { 0x20, 0x0202, "EventSensorEvent" }, { 0x20, 0x0300, "Property" }, { 0x20, 0x0301, "PropertyFriendlyName" }, { 0x20, 0x0302, "PropertyPersistentUniqueID" }, { 0x20, 0x0303, "PropertySensorStatus" }, { 0x20, 0x0304, "PropertyMinimumReportInterval" }, { 0x20, 0x0305, "PropertySensorManufacturer" }, { 0x20, 0x0306, "PropertySensorModel" }, { 0x20, 0x0307, "PropertySensorSerialNumber" }, { 0x20, 0x0308, "PropertySensorDescription" }, { 0x20, 0x0309, "PropertySensorConnectionType" }, { 0x20, 0x030a, "PropertySensorDevicePath" }, { 0x20, 0x030b, "PropertyHardwareRevision" }, { 0x20, 0x030c, "PropertyFirmwareVersion" }, { 0x20, 0x030d, "PropertyReleaseDate" }, { 0x20, 0x030e, "PropertyReportInterval" }, { 0x20, 0x030f, "PropertyChangeSensitivityAbsolute" }, { 0x20, 0x0310, "PropertyChangeSensitivityPercentofRange" }, { 0x20, 0x0311, "PropertyChangeSensitivityPercentRelative" }, { 0x20, 0x0312, "PropertyAccuracy" }, { 0x20, 0x0313, "PropertyResolution" }, { 0x20, 0x0314, "PropertyMaximum" }, { 0x20, 0x0315, "PropertyMinimum" }, { 0x20, 0x0316, "PropertyReportingState" }, { 0x20, 0x0317, "PropertySamplingRate" }, { 0x20, 0x0318, "PropertyResponseCurve" }, { 0x20, 0x0319, "PropertyPowerState" }, { 0x20, 0x031a, "PropertyMaximumFIFOEvents" }, { 0x20, 0x031b, "PropertyReportLatency" }, { 0x20, 0x031c, "PropertyFlushFIFOEvents" }, { 0x20, 0x031d, "PropertyMaximumPowerConsumption" }, { 0x20, 0x031e, "PropertyIsPrimary" }, { 0x20, 0x031f, "PropertyHumanPresenceDetectionType" }, { 0x20, 0x0400, "DataFieldLocation" }, { 0x20, 0x0402, "DataFieldAltitudeAntennaSeaLevel" }, { 0x20, 0x0403, "DataFieldDifferentialReferenceStationID" }, { 0x20, 0x0404, "DataFieldAltitudeEllipsoidError" }, { 0x20, 0x0405, "DataFieldAltitudeEllipsoid" }, { 0x20, 0x0406, "DataFieldAltitudeSeaLevelError" }, { 0x20, 0x0407, "DataFieldAltitudeSeaLevel" }, { 0x20, 0x0408, "DataFieldDifferentialGPSDataAge" }, { 0x20, 0x0409, "DataFieldErrorRadius" }, { 0x20, 0x040a, "DataFieldFixQuality" }, { 0x20, 0x040b, "DataFieldFixType" }, { 0x20, 0x040c, "DataFieldGeoidalSeparation" }, { 0x20, 0x040d, "DataFieldGPSOperationMode" }, { 0x20, 0x040e, "DataFieldGPSSelectionMode" }, { 0x20, 0x040f, "DataFieldGPSStatus" }, { 0x20, 0x0410, "DataFieldPositionDilutionofPrecision" }, { 0x20, 0x0411, "DataFieldHorizontalDilutionofPrecision" }, { 0x20, 0x0412, "DataFieldVerticalDilutionofPrecision" }, { 0x20, 0x0413, "DataFieldLatitude" }, { 0x20, 0x0414, "DataFieldLongitude" }, { 0x20, 0x0415, "DataFieldTrueHeading" }, { 0x20, 0x0416, "DataFieldMagneticHeading" }, { 0x20, 0x0417, "DataFieldMagneticVariation" }, { 0x20, 0x0418, "DataFieldSpeed" }, { 0x20, 0x0419, "DataFieldSatellitesinView" }, { 0x20, 0x041a, "DataFieldSatellitesinViewAzimuth" }, { 0x20, 0x041b, "DataFieldSatellitesinViewElevation" }, { 0x20, 0x041c, "DataFieldSatellitesinViewIDs" }, { 0x20, 0x041d, "DataFieldSatellitesinViewPRNs" }, { 0x20, 0x041e, "DataFieldSatellitesinViewSNRatios" }, { 0x20, 0x041f, "DataFieldSatellitesUsedCount" }, { 0x20, 0x0420, "DataFieldSatellitesUsedPRNs" }, { 0x20, 0x0421, "DataFieldNMEASentence" }, { 0x20, 0x0422, "DataFieldAddressLine1" }, { 0x20, 0x0423, "DataFieldAddressLine2" }, { 0x20, 0x0424, "DataFieldCity" }, { 0x20, 0x0425, "DataFieldStateorProvince" }, { 0x20, 0x0426, "DataFieldCountryorRegion" }, { 0x20, 0x0427, "DataFieldPostalCode" }, { 0x20, 0x042a, "PropertyLocation" }, { 0x20, 0x042b, "PropertyLocationDesiredAccuracy" }, { 0x20, 0x0430, "DataFieldEnvironmental" }, { 0x20, 0x0431, "DataFieldAtmosphericPressure" }, { 0x20, 0x0433, "DataFieldRelativeHumidity" }, { 0x20, 0x0434, "DataFieldTemperature" }, { 0x20, 0x0435, "DataFieldWindDirection" }, { 0x20, 0x0436, "DataFieldWindSpeed" }, { 0x20, 0x0437, "DataFieldAirQualityIndex" }, { 0x20, 0x0438, "DataFieldEquivalentCO2" }, { 0x20, 0x0439, "DataFieldVolatileOrganicCompoundConcentration" }, { 0x20, 0x043a, "DataFieldObjectPresence" }, { 0x20, 0x043b, "DataFieldObjectProximityRange" }, { 0x20, 0x043c, "DataFieldObjectProximityOutofRange" }, { 0x20, 0x0440, "PropertyEnvironmental" }, { 0x20, 0x0441, "PropertyReferencePressure" }, { 0x20, 0x0450, "DataFieldMotion" }, { 0x20, 0x0451, "DataFieldMotionState" }, { 0x20, 0x0452, "DataFieldAcceleration" }, { 0x20, 0x0453, "DataFieldAccelerationAxisX" }, { 0x20, 0x0454, "DataFieldAccelerationAxisY" }, { 0x20, 0x0455, "DataFieldAccelerationAxisZ" }, { 0x20, 0x0456, "DataFieldAngularVelocity" }, { 0x20, 0x0457, "DataFieldAngularVelocityaboutXAxis" }, { 0x20, 0x0458, "DataFieldAngularVelocityaboutYAxis" }, { 0x20, 0x0459, "DataFieldAngularVelocityaboutZAxis" }, { 0x20, 0x045a, "DataFieldAngularPosition" }, { 0x20, 0x045b, "DataFieldAngularPositionaboutXAxis" }, { 0x20, 0x045c, "DataFieldAngularPositionaboutYAxis" }, { 0x20, 0x045d, "DataFieldAngularPositionaboutZAxis" }, { 0x20, 0x045e, "DataFieldMotionSpeed" }, { 0x20, 0x045f, "DataFieldMotionIntensity" }, { 0x20, 0x0470, "DataFieldOrientation" }, { 0x20, 0x0471, "DataFieldHeading" }, { 0x20, 0x0472, "DataFieldHeadingXAxis" }, { 0x20, 0x0473, "DataFieldHeadingYAxis" }, { 0x20, 0x0474, "DataFieldHeadingZAxis" }, { 0x20, 0x0475, "DataFieldHeadingCompensatedMagneticNorth" }, { 0x20, 0x0476, "DataFieldHeadingCompensatedTrueNorth" }, { 0x20, 0x0477, "DataFieldHeadingMagneticNorth" }, { 0x20, 0x0478, "DataFieldHeadingTrueNorth" }, { 0x20, 0x0479, "DataFieldDistance" }, { 0x20, 0x047a, "DataFieldDistanceXAxis" }, { 0x20, 0x047b, "DataFieldDistanceYAxis" }, { 0x20, 0x047c, "DataFieldDistanceZAxis" }, { 0x20, 0x047d, "DataFieldDistanceOutofRange" }, { 0x20, 0x047e, "DataFieldTilt" }, { 0x20, 0x047f, "DataFieldTiltXAxis" }, { 0x20, 0x0480, "DataFieldTiltYAxis" }, { 0x20, 0x0481, "DataFieldTiltZAxis" }, { 0x20, 0x0482, "DataFieldRotationMatrix" }, { 0x20, 0x0483, "DataFieldQuaternion" }, { 0x20, 0x0484, "DataFieldMagneticFlux" }, { 0x20, 0x0485, "DataFieldMagneticFluxXAxis" }, { 0x20, 0x0486, "DataFieldMagneticFluxYAxis" }, { 0x20, 0x0487, "DataFieldMagneticFluxZAxis" }, { 0x20, 0x0488, "DataFieldMagnetometerAccuracy" }, { 0x20, 0x0489, "DataFieldSimpleOrientationDirection" }, { 0x20, 0x0490, "DataFieldMechanical" }, { 0x20, 0x0491, "DataFieldBooleanSwitchState" }, { 0x20, 0x0492, "DataFieldBooleanSwitchArrayStates" }, { 0x20, 0x0493, "DataFieldMultivalueSwitchValue" }, { 0x20, 0x0494, "DataFieldForce" }, { 0x20, 0x0495, "DataFieldAbsolutePressure" }, { 0x20, 0x0496, "DataFieldGaugePressure" }, { 0x20, 0x0497, "DataFieldStrain" }, { 0x20, 0x0498, "DataFieldWeight" }, { 0x20, 0x04a0, "PropertyMechanical" }, { 0x20, 0x04a1, "PropertyVibrationState" }, { 0x20, 0x04a2, "PropertyForwardVibrationSpeed" }, { 0x20, 0x04a3, "PropertyBackwardVibrationSpeed" }, { 0x20, 0x04b0, "DataFieldBiometric" }, { 0x20, 0x04b1, "DataFieldHumanPresence" }, { 0x20, 0x04b2, "DataFieldHumanProximityRange" }, { 0x20, 0x04b3, "DataFieldHumanProximityOutofRange" }, { 0x20, 0x04b4, "DataFieldHumanTouchState" }, { 0x20, 0x04b5, "DataFieldBloodPressure" }, { 0x20, 0x04b6, "DataFieldBloodPressureDiastolic" }, { 0x20, 0x04b7, "DataFieldBloodPressureSystolic" }, { 0x20, 0x04b8, "DataFieldHeartRate" }, { 0x20, 0x04b9, "DataFieldRestingHeartRate" }, { 0x20, 0x04ba, "DataFieldHeartbeatInterval" }, { 0x20, 0x04bb, "DataFieldRespiratoryRate" }, { 0x20, 0x04bc, "DataFieldSpO2" }, { 0x20, 0x04bd, "DataFieldHumanAttentionDetected" }, { 0x20, 0x04be, "DataFieldHumanHeadAzimuth" }, { 0x20, 0x04bf, "DataFieldHumanHeadAltitude" }, { 0x20, 0x04c0, "DataFieldHumanHeadRoll" }, { 0x20, 0x04c1, "DataFieldHumanHeadPitch" }, { 0x20, 0x04c2, "DataFieldHumanHeadYaw" }, { 0x20, 0x04c3, "DataFieldHumanCorrelationId" }, { 0x20, 0x04d0, "DataFieldLight" }, { 0x20, 0x04d1, "DataFieldIlluminance" }, { 0x20, 0x04d2, "DataFieldColorTemperature" }, { 0x20, 0x04d3, "DataFieldChromaticity" }, { 0x20, 0x04d4, "DataFieldChromaticityX" }, { 0x20, 0x04d5, "DataFieldChromaticityY" }, { 0x20, 0x04d6, "DataFieldConsumerIRSentenceReceive" }, { 0x20, 0x04d7, "DataFieldInfraredLight" }, { 0x20, 0x04d8, "DataFieldRedLight" }, { 0x20, 0x04d9, "DataFieldGreenLight" }, { 0x20, 0x04da, "DataFieldBlueLight" }, { 0x20, 0x04db, "DataFieldUltravioletALight" }, { 0x20, 0x04dc, "DataFieldUltravioletBLight" }, { 0x20, 0x04dd, "DataFieldUltravioletIndex" }, { 0x20, 0x04de, "DataFieldNearInfraredLight" }, { 0x20, 0x04df, "PropertyLight" }, { 0x20, 0x04e0, "PropertyConsumerIRSentenceSend" }, { 0x20, 0x04e2, "PropertyAutoBrightnessPreferred" }, { 0x20, 0x04e3, "PropertyAutoColorPreferred" }, { 0x20, 0x04f0, "DataFieldScanner" }, { 0x20, 0x04f1, "DataFieldRFIDTag40Bit" }, { 0x20, 0x04f2, "DataFieldNFCSentenceReceive" }, { 0x20, 0x04f8, "PropertyScanner" }, { 0x20, 0x04f9, "PropertyNFCSentenceSend" }, { 0x20, 0x0500, "DataFieldElectrical" }, { 0x20, 0x0501, "DataFieldCapacitance" }, { 0x20, 0x0502, "DataFieldCurrent" }, { 0x20, 0x0503, "DataFieldElectricalPower" }, { 0x20, 0x0504, "DataFieldInductance" }, { 0x20, 0x0505, "DataFieldResistance" }, { 0x20, 0x0506, "DataFieldVoltage" }, { 0x20, 0x0507, "DataFieldFrequency" }, { 0x20, 0x0508, "DataFieldPeriod" }, { 0x20, 0x0509, "DataFieldPercentofRange" }, { 0x20, 0x0520, "DataFieldTime" }, { 0x20, 0x0521, "DataFieldYear" }, { 0x20, 0x0522, "DataFieldMonth" }, { 0x20, 0x0523, "DataFieldDay" }, { 0x20, 0x0524, "DataFieldDayofWeek" }, { 0x20, 0x0525, "DataFieldHour" }, { 0x20, 0x0526, "DataFieldMinute" }, { 0x20, 0x0527, "DataFieldSecond" }, { 0x20, 0x0528, "DataFieldMillisecond" }, { 0x20, 0x0529, "DataFieldTimestamp" }, { 0x20, 0x052a, "DataFieldJulianDayofYear" }, { 0x20, 0x052b, "DataFieldTimeSinceSystemBoot" }, { 0x20, 0x0530, "PropertyTime" }, { 0x20, 0x0531, "PropertyTimeZoneOffsetfromUTC" }, { 0x20, 0x0532, "PropertyTimeZoneName" }, { 0x20, 0x0533, "PropertyDaylightSavingsTimeObserved" }, { 0x20, 0x0534, "PropertyTimeTrimAdjustment" }, { 0x20, 0x0535, "PropertyArmAlarm" }, { 0x20, 0x0540, "DataFieldCustom" }, { 0x20, 0x0541, "DataFieldCustomUsage" }, { 0x20, 0x0542, "DataFieldCustomBooleanArray" }, { 0x20, 0x0543, "DataFieldCustomValue" }, { 0x20, 0x0544, "DataFieldCustomValue1" }, { 0x20, 0x0545, "DataFieldCustomValue2" }, { 0x20, 0x0546, "DataFieldCustomValue3" }, { 0x20, 0x0547, "DataFieldCustomValue4" }, { 0x20, 0x0548, "DataFieldCustomValue5" }, { 0x20, 0x0549, "DataFieldCustomValue6" }, { 0x20, 0x054a, "DataFieldCustomValue7" }, { 0x20, 0x054b, "DataFieldCustomValue8" }, { 0x20, 0x054c, "DataFieldCustomValue9" }, { 0x20, 0x054d, "DataFieldCustomValue10" }, { 0x20, 0x054e, "DataFieldCustomValue11" }, { 0x20, 0x054f, "DataFieldCustomValue12" }, { 0x20, 0x0550, "DataFieldCustomValue13" }, { 0x20, 0x0551, "DataFieldCustomValue14" }, { 0x20, 0x0552, "DataFieldCustomValue15" }, { 0x20, 0x0553, "DataFieldCustomValue16" }, { 0x20, 0x0554, "DataFieldCustomValue17" }, { 0x20, 0x0555, "DataFieldCustomValue18" }, { 0x20, 0x0556, "DataFieldCustomValue19" }, { 0x20, 0x0557, "DataFieldCustomValue20" }, { 0x20, 0x0558, "DataFieldCustomValue21" }, { 0x20, 0x0559, "DataFieldCustomValue22" }, { 0x20, 0x055a, "DataFieldCustomValue23" }, { 0x20, 0x055b, "DataFieldCustomValue24" }, { 0x20, 0x055c, "DataFieldCustomValue25" }, { 0x20, 0x055d, "DataFieldCustomValue26" }, { 0x20, 0x055e, "DataFieldCustomValue27" }, { 0x20, 0x055f, "DataFieldCustomValue28" }, { 0x20, 0x0560, "DataFieldGeneric" }, { 0x20, 0x0561, "DataFieldGenericGUIDorPROPERTYKEY" }, { 0x20, 0x0562, "DataFieldGenericCategoryGUID" }, { 0x20, 0x0563, "DataFieldGenericTypeGUID" }, { 0x20, 0x0564, "DataFieldGenericEventPROPERTYKEY" }, { 0x20, 0x0565, "DataFieldGenericPropertyPROPERTYKEY" }, { 0x20, 0x0566, "DataFieldGenericDataFieldPROPERTYKEY" }, { 0x20, 0x0567, "DataFieldGenericEvent" }, { 0x20, 0x0568, "DataFieldGenericProperty" }, { 0x20, 0x0569, "DataFieldGenericDataField" }, { 0x20, 0x056a, "DataFieldEnumeratorTableRowIndex" }, { 0x20, 0x056b, "DataFieldEnumeratorTableRowCount" }, { 0x20, 0x056c, "DataFieldGenericGUIDorPROPERTYKEYkind" }, { 0x20, 0x056d, "DataFieldGenericGUID" }, { 0x20, 0x056e, "DataFieldGenericPROPERTYKEY" }, { 0x20, 0x056f, "DataFieldGenericTopLevelCollectionID" }, { 0x20, 0x0570, "DataFieldGenericReportID" }, { 0x20, 0x0571, "DataFieldGenericReportItemPositionIndex" }, { 0x20, 0x0572, "DataFieldGenericFirmwareVARTYPE" }, { 0x20, 0x0573, "DataFieldGenericUnitofMeasure" }, { 0x20, 0x0574, "DataFieldGenericUnitExponent" }, { 0x20, 0x0575, "DataFieldGenericReportSize" }, { 0x20, 0x0576, "DataFieldGenericReportCount" }, { 0x20, 0x0580, "PropertyGeneric" }, { 0x20, 0x0581, "PropertyEnumeratorTableRowIndex" }, { 0x20, 0x0582, "PropertyEnumeratorTableRowCount" }, { 0x20, 0x0590, "DataFieldPersonalActivity" }, { 0x20, 0x0591, "DataFieldActivityType" }, { 0x20, 0x0592, "DataFieldActivityState" }, { 0x20, 0x0593, "DataFieldDevicePosition" }, { 0x20, 0x0594, "DataFieldStepCount" }, { 0x20, 0x0595, "DataFieldStepCountReset" }, { 0x20, 0x0596, "DataFieldStepDuration" }, { 0x20, 0x0597, "DataFieldStepType" }, { 0x20, 0x05a0, "PropertyMinimumActivityDetectionInterval" }, { 0x20, 0x05a1, "PropertySupportedActivityTypes" }, { 0x20, 0x05a2, "PropertySubscribedActivityTypes" }, { 0x20, 0x05a3, "PropertySupportedStepTypes" }, { 0x20, 0x05a4, "PropertySubscribedStepTypes" }, { 0x20, 0x05a5, "PropertyFloorHeight" }, { 0x20, 0x05b0, "DataFieldCustomTypeID" }, { 0x20, 0x05c0, "PropertyCustom" }, { 0x20, 0x05c1, "PropertyCustomValue1" }, { 0x20, 0x05c2, "PropertyCustomValue2" }, { 0x20, 0x05c3, "PropertyCustomValue3" }, { 0x20, 0x05c4, "PropertyCustomValue4" }, { 0x20, 0x05c5, "PropertyCustomValue5" }, { 0x20, 0x05c6, "PropertyCustomValue6" }, { 0x20, 0x05c7, "PropertyCustomValue7" }, { 0x20, 0x05c8, "PropertyCustomValue8" }, { 0x20, 0x05c9, "PropertyCustomValue9" }, { 0x20, 0x05ca, "PropertyCustomValue10" }, { 0x20, 0x05cb, "PropertyCustomValue11" }, { 0x20, 0x05cc, "PropertyCustomValue12" }, { 0x20, 0x05cd, "PropertyCustomValue13" }, { 0x20, 0x05ce, "PropertyCustomValue14" }, { 0x20, 0x05cf, "PropertyCustomValue15" }, { 0x20, 0x05d0, "PropertyCustomValue16" }, { 0x20, 0x05e0, "DataFieldHinge" }, { 0x20, 0x05e1, "DataFieldHingeAngle" }, { 0x20, 0x05f0, "DataFieldGestureSensor" }, { 0x20, 0x05f1, "DataFieldGestureState" }, { 0x20, 0x05f2, "DataFieldHingeFoldInitialAngle" }, { 0x20, 0x05f3, "DataFieldHingeFoldFinalAngle" }, { 0x20, 0x05f4, "DataFieldHingeFoldContributingPanel" }, { 0x20, 0x05f5, "DataFieldHingeFoldType" }, { 0x20, 0x0800, "SensorStateUndefined" }, { 0x20, 0x0801, "SensorStateReady" }, { 0x20, 0x0802, "SensorStateNotAvailable" }, { 0x20, 0x0803, "SensorStateNoData" }, { 0x20, 0x0804, "SensorStateInitializing" }, { 0x20, 0x0805, "SensorStateAccessDenied" }, { 0x20, 0x0806, "SensorStateError" }, { 0x20, 0x0810, "SensorEventUnknown" }, { 0x20, 0x0811, "SensorEventStateChanged" }, { 0x20, 0x0812, "SensorEventPropertyChanged" }, { 0x20, 0x0813, "SensorEventDataUpdated" }, { 0x20, 0x0814, "SensorEventPollResponse" }, { 0x20, 0x0815, "SensorEventChangeSensitivity" }, { 0x20, 0x0816, "SensorEventRangeMaximumReached" }, { 0x20, 0x0817, "SensorEventRangeMinimumReached" }, { 0x20, 0x0818, "SensorEventHighThresholdCrossUpward" }, { 0x20, 0x0819, "SensorEventHighThresholdCrossDownward" }, { 0x20, 0x081a, "SensorEventLowThresholdCrossUpward" }, { 0x20, 0x081b, "SensorEventLowThresholdCrossDownward" }, { 0x20, 0x081c, "SensorEventZeroThresholdCrossUpward" }, { 0x20, 0x081d, "SensorEventZeroThresholdCrossDownward" }, { 0x20, 0x081e, "SensorEventPeriodExceeded" }, { 0x20, 0x081f, "SensorEventFrequencyExceeded" }, { 0x20, 0x0820, "SensorEventComplexTrigger" }, { 0x20, 0x0830, "ConnectionTypePCIntegrated" }, { 0x20, 0x0831, "ConnectionTypePCAttached" }, { 0x20, 0x0832, "ConnectionTypePCExternal" }, { 0x20, 0x0840, "ReportingStateReportNoEvents" }, { 0x20, 0x0841, "ReportingStateReportAllEvents" }, { 0x20, 0x0842, "ReportingStateReportThresholdEvents" }, { 0x20, 0x0843, "ReportingStateWakeOnNoEvents" }, { 0x20, 0x0844, "ReportingStateWakeOnAllEvents" }, { 0x20, 0x0845, "ReportingStateWakeOnThresholdEvents" }, { 0x20, 0x0846, "ReportingStateAnytime" }, { 0x20, 0x0850, "PowerStateUndefined" }, { 0x20, 0x0851, "PowerStateD0FullPower" }, { 0x20, 0x0852, "PowerStateD1LowPower" }, { 0x20, 0x0853, "PowerStateD2StandbyPowerwithWakeup" }, { 0x20, 0x0854, "PowerStateD3SleepwithWakeup" }, { 0x20, 0x0855, "PowerStateD4PowerOff" }, { 0x20, 0x0860, "AccuracyDefault" }, { 0x20, 0x0861, "AccuracyHigh" }, { 0x20, 0x0862, "AccuracyMedium" }, { 0x20, 0x0863, "AccuracyLow" }, { 0x20, 0x0870, "FixQualityNoFix" }, { 0x20, 0x0871, "FixQualityGPS" }, { 0x20, 0x0872, "FixQualityDGPS" }, { 0x20, 0x0880, "FixTypeNoFix" }, { 0x20, 0x0881, "FixTypeGPSSPSModeFixValid" }, { 0x20, 0x0882, "FixTypeDGPSSPSModeFixValid" }, { 0x20, 0x0883, "FixTypeGPSPPSModeFixValid" }, { 0x20, 0x0884, "FixTypeRealTimeKinematic" }, { 0x20, 0x0885, "FixTypeFloatRTK" }, { 0x20, 0x0886, "FixTypeEstimateddeadreckoned" }, { 0x20, 0x0887, "FixTypeManualInputMode" }, { 0x20, 0x0888, "FixTypeSimulatorMode" }, { 0x20, 0x0890, "GPSOperationModeManual" }, { 0x20, 0x0891, "GPSOperationModeAutomatic" }, { 0x20, 0x08a0, "GPSSelectionModeAutonomous" }, { 0x20, 0x08a1, "GPSSelectionModeDGPS" }, { 0x20, 0x08a2, "GPSSelectionModeEstimateddeadreckoned" }, { 0x20, 0x08a3, "GPSSelectionModeManualInput" }, { 0x20, 0x08a4, "GPSSelectionModeSimulator" }, { 0x20, 0x08a5, "GPSSelectionModeDataNotValid" }, { 0x20, 0x08b0, "GPSStatusDataValid" }, { 0x20, 0x08b1, "GPSStatusDataNotValid" }, { 0x20, 0x08c0, "DayofWeekSunday" }, { 0x20, 0x08c1, "DayofWeekMonday" }, { 0x20, 0x08c2, "DayofWeekTuesday" }, { 0x20, 0x08c3, "DayofWeekWednesday" }, { 0x20, 0x08c4, "DayofWeekThursday" }, { 0x20, 0x08c5, "DayofWeekFriday" }, { 0x20, 0x08c6, "DayofWeekSaturday" }, { 0x20, 0x08d0, "KindCategory" }, { 0x20, 0x08d1, "KindType" }, { 0x20, 0x08d2, "KindEvent" }, { 0x20, 0x08d3, "KindProperty" }, { 0x20, 0x08d4, "KindDataField" }, { 0x20, 0x08e0, "MagnetometerAccuracyLow" }, { 0x20, 0x08e1, "MagnetometerAccuracyMedium" }, { 0x20, 0x08e2, "MagnetometerAccuracyHigh" }, { 0x20, 0x08f0, "SimpleOrientationDirectionNotRotated" }, { 0x20, 0x08f1, "SimpleOrientationDirectionRotated90DegreesCCW" }, { 0x20, 0x08f2, "SimpleOrientationDirectionRotated180DegreesCCW" }, { 0x20, 0x08f3, "SimpleOrientationDirectionRotated270DegreesCCW" }, { 0x20, 0x08f4, "SimpleOrientationDirectionFaceUp" }, { 0x20, 0x08f5, "SimpleOrientationDirectionFaceDown" }, { 0x20, 0x0900, "VT_NULL" }, { 0x20, 0x0901, "VT_BOOL" }, { 0x20, 0x0902, "VT_UI1" }, { 0x20, 0x0903, "VT_I1" }, { 0x20, 0x0904, "VT_UI2" }, { 0x20, 0x0905, "VT_I2" }, { 0x20, 0x0906, "VT_UI4" }, { 0x20, 0x0907, "VT_I4" }, { 0x20, 0x0908, "VT_UI8" }, { 0x20, 0x0909, "VT_I8" }, { 0x20, 0x090a, "VT_R4" }, { 0x20, 0x090b, "VT_R8" }, { 0x20, 0x090c, "VT_WSTR" }, { 0x20, 0x090d, "VT_STR" }, { 0x20, 0x090e, "VT_CLSID" }, { 0x20, 0x090f, "VT_VECTORVT_UI1" }, { 0x20, 0x0910, "VT_F16E0" }, { 0x20, 0x0911, "VT_F16E1" }, { 0x20, 0x0912, "VT_F16E2" }, { 0x20, 0x0913, "VT_F16E3" }, { 0x20, 0x0914, "VT_F16E4" }, { 0x20, 0x0915, "VT_F16E5" }, { 0x20, 0x0916, "VT_F16E6" }, { 0x20, 0x0917, "VT_F16E7" }, { 0x20, 0x0918, "VT_F16E8" }, { 0x20, 0x0919, "VT_F16E9" }, { 0x20, 0x091a, "VT_F16EA" }, { 0x20, 0x091b, "VT_F16EB" }, { 0x20, 0x091c, "VT_F16EC" }, { 0x20, 0x091d, "VT_F16ED" }, { 0x20, 0x091e, "VT_F16EE" }, { 0x20, 0x091f, "VT_F16EF" }, { 0x20, 0x0920, "VT_F32E0" }, { 0x20, 0x0921, "VT_F32E1" }, { 0x20, 0x0922, "VT_F32E2" }, { 0x20, 0x0923, "VT_F32E3" }, { 0x20, 0x0924, "VT_F32E4" }, { 0x20, 0x0925, "VT_F32E5" }, { 0x20, 0x0926, "VT_F32E6" }, { 0x20, 0x0927, "VT_F32E7" }, { 0x20, 0x0928, "VT_F32E8" }, { 0x20, 0x0929, "VT_F32E9" }, { 0x20, 0x092a, "VT_F32EA" }, { 0x20, 0x092b, "VT_F32EB" }, { 0x20, 0x092c, "VT_F32EC" }, { 0x20, 0x092d, "VT_F32ED" }, { 0x20, 0x092e, "VT_F32EE" }, { 0x20, 0x092f, "VT_F32EF" }, { 0x20, 0x0930, "ActivityTypeUnknown" }, { 0x20, 0x0931, "ActivityTypeStationary" }, { 0x20, 0x0932, "ActivityTypeFidgeting" }, { 0x20, 0x0933, "ActivityTypeWalking" }, { 0x20, 0x0934, "ActivityTypeRunning" }, { 0x20, 0x0935, "ActivityTypeInVehicle" }, { 0x20, 0x0936, "ActivityTypeBiking" }, { 0x20, 0x0937, "ActivityTypeIdle" }, { 0x20, 0x0940, "UnitNotSpecified" }, { 0x20, 0x0941, "UnitLux" }, { 0x20, 0x0942, "UnitDegreesKelvin" }, { 0x20, 0x0943, "UnitDegreesCelsius" }, { 0x20, 0x0944, "UnitPascal" }, { 0x20, 0x0945, "UnitNewton" }, { 0x20, 0x0946, "UnitMetersSecond" }, { 0x20, 0x0947, "UnitKilogram" }, { 0x20, 0x0948, "UnitMeter" }, { 0x20, 0x0949, "UnitMetersSecondSecond" }, { 0x20, 0x094a, "UnitFarad" }, { 0x20, 0x094b, "UnitAmpere" }, { 0x20, 0x094c, "UnitWatt" }, { 0x20, 0x094d, "UnitHenry" }, { 0x20, 0x094e, "UnitOhm" }, { 0x20, 0x094f, "UnitVolt" }, { 0x20, 0x0950, "UnitHertz" }, { 0x20, 0x0951, "UnitBar" }, { 0x20, 0x0952, "UnitDegreesAnticlockwise" }, { 0x20, 0x0953, "UnitDegreesClockwise" }, { 0x20, 0x0954, "UnitDegrees" }, { 0x20, 0x0955, "UnitDegreesSecond" }, { 0x20, 0x0956, "UnitDegreesSecondSecond" }, { 0x20, 0x0957, "UnitKnot" }, { 0x20, 0x0958, "UnitPercent" }, { 0x20, 0x0959, "UnitSecond" }, { 0x20, 0x095a, "UnitMillisecond" }, { 0x20, 0x095b, "UnitG" }, { 0x20, 0x095c, "UnitBytes" }, { 0x20, 0x095d, "UnitMilligauss" }, { 0x20, 0x095e, "UnitBits" }, { 0x20, 0x0960, "ActivityStateNoStateChange" }, { 0x20, 0x0961, "ActivityStateStartActivity" }, { 0x20, 0x0962, "ActivityStateEndActivity" }, { 0x20, 0x0970, "Exponent0" }, { 0x20, 0x0971, "Exponent1" }, { 0x20, 0x0972, "Exponent2" }, { 0x20, 0x0973, "Exponent3" }, { 0x20, 0x0974, "Exponent4" }, { 0x20, 0x0975, "Exponent5" }, { 0x20, 0x0976, "Exponent6" }, { 0x20, 0x0977, "Exponent7" }, { 0x20, 0x0978, "Exponent8" }, { 0x20, 0x0979, "Exponent9" }, { 0x20, 0x097a, "ExponentA" }, { 0x20, 0x097b, "ExponentB" }, { 0x20, 0x097c, "ExponentC" }, { 0x20, 0x097d, "ExponentD" }, { 0x20, 0x097e, "ExponentE" }, { 0x20, 0x097f, "ExponentF" }, { 0x20, 0x0980, "DevicePositionUnknown" }, { 0x20, 0x0981, "DevicePositionUnchanged" }, { 0x20, 0x0982, "DevicePositionOnDesk" }, { 0x20, 0x0983, "DevicePositionInHand" }, { 0x20, 0x0984, "DevicePositionMovinginBag" }, { 0x20, 0x0985, "DevicePositionStationaryinBag" }, { 0x20, 0x0990, "StepTypeUnknown" }, { 0x20, 0x0991, "StepTypeWalking" }, { 0x20, 0x0992, "StepTypeRunning" }, { 0x20, 0x09a0, "GestureStateUnknown" }, { 0x20, 0x09a1, "GestureStateStarted" }, { 0x20, 0x09a2, "GestureStateCompleted" }, { 0x20, 0x09a3, "GestureStateCancelled" }, { 0x20, 0x09b0, "HingeFoldContributingPanelUnknown" }, { 0x20, 0x09b1, "HingeFoldContributingPanelPanel1" }, { 0x20, 0x09b2, "HingeFoldContributingPanelPanel2" }, { 0x20, 0x09b3, "HingeFoldContributingPanelBoth" }, { 0x20, 0x09b4, "HingeFoldTypeUnknown" }, { 0x20, 0x09b5, "HingeFoldTypeIncreasing" }, { 0x20, 0x09b6, "HingeFoldTypeDecreasing" }, { 0x20, 0x09c0, "HumanPresenceDetectionTypeVendorDefinedNonBiometric" }, { 0x20, 0x09c1, "HumanPresenceDetectionTypeVendorDefinedBiometric" }, { 0x20, 0x09c2, "HumanPresenceDetectionTypeFacialBiometric" }, { 0x20, 0x09c3, "HumanPresenceDetectionTypeAudioBiometric" }, { 0x20, 0x1000, "ModifierChangeSensitivityAbsolute" }, { 0x20, 0x2000, "ModifierMaximum" }, { 0x20, 0x3000, "ModifierMinimum" }, { 0x20, 0x4000, "ModifierAccuracy" }, { 0x20, 0x5000, "ModifierResolution" }, { 0x20, 0x6000, "ModifierThresholdHigh" }, { 0x20, 0x7000, "ModifierThresholdLow" }, { 0x20, 0x8000, "ModifierCalibrationOffset" }, { 0x20, 0x9000, "ModifierCalibrationMultiplier" }, { 0x20, 0xa000, "ModifierReportInterval" }, { 0x20, 0xb000, "ModifierFrequencyMax" }, { 0x20, 0xc000, "ModifierPeriodMax" }, { 0x20, 0xd000, "ModifierChangeSensitivityPercentofRange" }, { 0x20, 0xe000, "ModifierChangeSensitivityPercentRelative" }, { 0x20, 0xf000, "ModifierVendorReserved" }, { 0x40, 0, "MedicalInstrument" }, { 0x40, 0x0001, "MedicalUltrasound" }, { 0x40, 0x0020, "VCRAcquisition" }, { 0x40, 0x0021, "FreezeThaw" }, { 0x40, 0x0022, "ClipStore" }, { 0x40, 0x0023, "Update" }, { 0x40, 0x0024, "Next" }, { 0x40, 0x0025, "Save" }, { 0x40, 0x0026, "Print" }, { 0x40, 0x0027, "MicrophoneEnable" }, { 0x40, 0x0040, "Cine" }, { 0x40, 0x0041, "TransmitPower" }, { 0x40, 0x0042, "Volume" }, { 0x40, 0x0043, "Focus" }, { 0x40, 0x0044, "Depth" }, { 0x40, 0x0060, "SoftStepPrimary" }, { 0x40, 0x0061, "SoftStepSecondary" }, { 0x40, 0x0070, "DepthGainCompensation" }, { 0x40, 0x0080, "ZoomSelect" }, { 0x40, 0x0081, "ZoomAdjust" }, { 0x40, 0x0082, "SpectralDopplerModeSelect" }, { 0x40, 0x0083, "SpectralDopplerAdjust" }, { 0x40, 0x0084, "ColorDopplerModeSelect" }, { 0x40, 0x0085, "ColorDopplerAdjust" }, { 0x40, 0x0086, "MotionModeSelect" }, { 0x40, 0x0087, "MotionModeAdjust" }, { 0x40, 0x0088, "2DModeSelect" }, { 0x40, 0x0089, "2DModeAdjust" }, { 0x40, 0x00a0, "SoftControlSelect" }, { 0x40, 0x00a1, "SoftControlAdjust" }, { 0x41, 0, "BrailleDisplay" }, { 0x41, 0x0001, "BrailleDisplay" }, { 0x41, 0x0002, "BrailleRow" }, { 0x41, 0x0003, "8DotBrailleCell" }, { 0x41, 0x0004, "6DotBrailleCell" }, { 0x41, 0x0005, "NumberofBrailleCells" }, { 0x41, 0x0006, "ScreenReaderControl" }, { 0x41, 0x0007, "ScreenReaderIdentifier" }, { 0x41, 0x00fa, "RouterSet1" }, { 0x41, 0x00fb, "RouterSet2" }, { 0x41, 0x00fc, "RouterSet3" }, { 0x41, 0x0100, "RouterKey" }, { 0x41, 0x0101, "RowRouterKey" }, { 0x41, 0x0200, "BrailleButtons" }, { 0x41, 0x0201, "BrailleKeyboardDot1" }, { 0x41, 0x0202, "BrailleKeyboardDot2" }, { 0x41, 0x0203, "BrailleKeyboardDot3" }, { 0x41, 0x0204, "BrailleKeyboardDot4" }, { 0x41, 0x0205, "BrailleKeyboardDot5" }, { 0x41, 0x0206, "BrailleKeyboardDot6" }, { 0x41, 0x0207, "BrailleKeyboardDot7" }, { 0x41, 0x0208, "BrailleKeyboardDot8" }, { 0x41, 0x0209, "BrailleKeyboardSpace" }, { 0x41, 0x020a, "BrailleKeyboardLeftSpace" }, { 0x41, 0x020b, "BrailleKeyboardRightSpace" }, { 0x41, 0x020c, "BrailleFaceControls" }, { 0x41, 0x020d, "BrailleLeftControls" }, { 0x41, 0x020e, "BrailleRightControls" }, { 0x41, 0x020f, "BrailleTopControls" }, { 0x41, 0x0210, "BrailleJoystickCenter" }, { 0x41, 0x0211, "BrailleJoystickUp" }, { 0x41, 0x0212, "BrailleJoystickDown" }, { 0x41, 0x0213, "BrailleJoystickLeft" }, { 0x41, 0x0214, "BrailleJoystickRight" }, { 0x41, 0x0215, "BrailleDPadCenter" }, { 0x41, 0x0216, "BrailleDPadUp" }, { 0x41, 0x0217, "BrailleDPadDown" }, { 0x41, 0x0218, "BrailleDPadLeft" }, { 0x41, 0x0219, "BrailleDPadRight" }, { 0x41, 0x021a, "BraillePanLeft" }, { 0x41, 0x021b, "BraillePanRight" }, { 0x41, 0x021c, "BrailleRockerUp" }, { 0x41, 0x021d, "BrailleRockerDown" }, { 0x41, 0x021e, "BrailleRockerPress" }, { 0x59, 0, "LightingAndIllumination" }, { 0x59, 0x0001, "LampArray" }, { 0x59, 0x0002, "LampArrayAttributesReport" }, { 0x59, 0x0003, "LampCount" }, { 0x59, 0x0004, "BoundingBoxWidthInMicrometers" }, { 0x59, 0x0005, "BoundingBoxHeightInMicrometers" }, { 0x59, 0x0006, "BoundingBoxDepthInMicrometers" }, { 0x59, 0x0007, "LampArrayKind" }, { 0x59, 0x0008, "MinUpdateIntervalInMicroseconds" }, { 0x59, 0x0020, "LampAttributesRequestReport" }, { 0x59, 0x0021, "LampId" }, { 0x59, 0x0022, "LampAttributesResponseReport" }, { 0x59, 0x0023, "PositionXInMicrometers" }, { 0x59, 0x0024, "PositionYInMicrometers" }, { 0x59, 0x0025, "PositionZInMicrometers" }, { 0x59, 0x0026, "LampPurposes" }, { 0x59, 0x0027, "UpdateLatencyInMicroseconds" }, { 0x59, 0x0028, "RedLevelCount" }, { 0x59, 0x0029, "GreenLevelCount" }, { 0x59, 0x002a, "BlueLevelCount" }, { 0x59, 0x002b, "IntensityLevelCount" }, { 0x59, 0x002c, "IsProgrammable" }, { 0x59, 0x002d, "InputBinding" }, { 0x59, 0x0050, "LampMultiUpdateReport" }, { 0x59, 0x0051, "RedUpdateChannel" }, { 0x59, 0x0052, "GreenUpdateChannel" }, { 0x59, 0x0053, "BlueUpdateChannel" }, { 0x59, 0x0054, "IntensityUpdateChannel" }, { 0x59, 0x0055, "LampUpdateFlags" }, { 0x59, 0x0060, "LampRangeUpdateReport" }, { 0x59, 0x0061, "LampIdStart" }, { 0x59, 0x0062, "LampIdEnd" }, { 0x59, 0x0070, "LampArrayControlReport" }, { 0x59, 0x0071, "AutonomousMode" }, { 0x80, 0, "Monitor" }, { 0x80, 0x0001, "MonitorControl" }, { 0x80, 0x0002, "EDIDInformation" }, { 0x80, 0x0003, "VDIFInformation" }, { 0x80, 0x0004, "VESAVersion" }, { 0x81, 0, "MonitorEnumerated" }, { 0x82, 0, "VESAVirtualControls" }, { 0x82, 0x0001, "Degauss" }, { 0x82, 0x0010, "Brightness" }, { 0x82, 0x0012, "Contrast" }, { 0x82, 0x0016, "RedVideoGain" }, { 0x82, 0x0018, "GreenVideoGain" }, { 0x82, 0x001a, "BlueVideoGain" }, { 0x82, 0x001c, "Focus" }, { 0x82, 0x0020, "HorizontalPosition" }, { 0x82, 0x0022, "HorizontalSize" }, { 0x82, 0x0024, "HorizontalPincushion" }, { 0x82, 0x0026, "HorizontalPincushionBalance" }, { 0x82, 0x0028, "HorizontalMisconvergence" }, { 0x82, 0x002a, "HorizontalLinearity" }, { 0x82, 0x002c, "HorizontalLinearityBalance" }, { 0x82, 0x0030, "VerticalPosition" }, { 0x82, 0x0032, "VerticalSize" }, { 0x82, 0x0034, "VerticalPincushion" }, { 0x82, 0x0036, "VerticalPincushionBalance" }, { 0x82, 0x0038, "VerticalMisconvergence" }, { 0x82, 0x003a, "VerticalLinearity" }, { 0x82, 0x003c, "VerticalLinearityBalance" }, { 0x82, 0x0040, "ParallelogramDistortionKeyBalance" }, { 0x82, 0x0042, "TrapezoidalDistortionKey" }, { 0x82, 0x0044, "TiltRotation" }, { 0x82, 0x0046, "TopCornerDistortionControl" }, { 0x82, 0x0048, "TopCornerDistortionBalance" }, { 0x82, 0x004a, "BottomCornerDistortionControl" }, { 0x82, 0x004c, "BottomCornerDistortionBalance" }, { 0x82, 0x0056, "HorizontalMoire" }, { 0x82, 0x0058, "VerticalMoire" }, { 0x82, 0x005e, "InputLevelSelect" }, { 0x82, 0x0060, "InputSourceSelect" }, { 0x82, 0x006c, "RedVideoBlackLevel" }, { 0x82, 0x006e, "GreenVideoBlackLevel" }, { 0x82, 0x0070, "BlueVideoBlackLevel" }, { 0x82, 0x00a2, "AutoSizeCenter" }, { 0x82, 0x00a4, "PolarityHorizontalSynchronization" }, { 0x82, 0x00a6, "PolarityVerticalSynchronization" }, { 0x82, 0x00a8, "SynchronizationType" }, { 0x82, 0x00aa, "ScreenOrientation" }, { 0x82, 0x00ac, "HorizontalFrequency" }, { 0x82, 0x00ae, "VerticalFrequency" }, { 0x82, 0x00b0, "Settings" }, { 0x82, 0x00ca, "OnScreenDisplay" }, { 0x82, 0x00d4, "StereoMode" }, { 0x84, 0, "Power" }, { 0x84, 0x0001, "iName" }, { 0x84, 0x0002, "PresentStatus" }, { 0x84, 0x0003, "ChangedStatus" }, { 0x84, 0x0004, "UPS" }, { 0x84, 0x0005, "PowerSupply" }, { 0x84, 0x0010, "BatterySystem" }, { 0x84, 0x0011, "BatterySystemId" }, { 0x84, 0x0012, "Battery" }, { 0x84, 0x0013, "BatteryId" }, { 0x84, 0x0014, "Charger" }, { 0x84, 0x0015, "ChargerId" }, { 0x84, 0x0016, "PowerConverter" }, { 0x84, 0x0017, "PowerConverterId" }, { 0x84, 0x0018, "OutletSystem" }, { 0x84, 0x0019, "OutletSystemId" }, { 0x84, 0x001a, "Input" }, { 0x84, 0x001b, "InputId" }, { 0x84, 0x001c, "Output" }, { 0x84, 0x001d, "OutputId" }, { 0x84, 0x001e, "Flow" }, { 0x84, 0x001f, "FlowId" }, { 0x84, 0x0020, "Outlet" }, { 0x84, 0x0021, "OutletId" }, { 0x84, 0x0022, "Gang" }, { 0x84, 0x0023, "GangId" }, { 0x84, 0x0024, "PowerSummary" }, { 0x84, 0x0025, "PowerSummaryId" }, { 0x84, 0x0030, "Voltage" }, { 0x84, 0x0031, "Current" }, { 0x84, 0x0032, "Frequency" }, { 0x84, 0x0033, "ApparentPower" }, { 0x84, 0x0034, "ActivePower" }, { 0x84, 0x0035, "PercentLoad" }, { 0x84, 0x0036, "Temperature" }, { 0x84, 0x0037, "Humidity" }, { 0x84, 0x0038, "BadCount" }, { 0x84, 0x0040, "ConfigVoltage" }, { 0x84, 0x0041, "ConfigCurrent" }, { 0x84, 0x0042, "ConfigFrequency" }, { 0x84, 0x0043, "ConfigApparentPower" }, { 0x84, 0x0044, "ConfigActivePower" }, { 0x84, 0x0045, "ConfigPercentLoad" }, { 0x84, 0x0046, "ConfigTemperature" }, { 0x84, 0x0047, "ConfigHumidity" }, { 0x84, 0x0050, "SwitchOnControl" }, { 0x84, 0x0051, "SwitchOffControl" }, { 0x84, 0x0052, "ToggleControl" }, { 0x84, 0x0053, "LowVoltageTransfer" }, { 0x84, 0x0054, "HighVoltageTransfer" }, { 0x84, 0x0055, "DelayBeforeReboot" }, { 0x84, 0x0056, "DelayBeforeStartup" }, { 0x84, 0x0057, "DelayBeforeShutdown" }, { 0x84, 0x0058, "Test" }, { 0x84, 0x0059, "ModuleReset" }, { 0x84, 0x005a, "AudibleAlarmControl" }, { 0x84, 0x0060, "Present" }, { 0x84, 0x0061, "Good" }, { 0x84, 0x0062, "InternalFailure" }, { 0x84, 0x0063, "VoltagOutOfRange" }, { 0x84, 0x0064, "FrequencyOutOfRange" }, { 0x84, 0x0065, "Overload" }, { 0x84, 0x0066, "OverCharged" }, { 0x84, 0x0067, "OverTemperature" }, { 0x84, 0x0068, "ShutdownRequested" }, { 0x84, 0x0069, "ShutdownImminent" }, { 0x84, 0x006b, "SwitchOnOff" }, { 0x84, 0x006c, "Switchable" }, { 0x84, 0x006d, "Used" }, { 0x84, 0x006e, "Boost" }, { 0x84, 0x006f, "Buck" }, { 0x84, 0x0070, "Initialized" }, { 0x84, 0x0071, "Tested" }, { 0x84, 0x0072, "AwaitingPower" }, { 0x84, 0x0073, "CommunicationLost" }, { 0x84, 0x00fd, "iManufacturer" }, { 0x84, 0x00fe, "iProduct" }, { 0x84, 0x00ff, "iSerialNumber" }, { 0x85, 0, "BatterySystem" }, { 0x85, 0x0001, "SmartBatteryBatteryMode" }, { 0x85, 0x0002, "SmartBatteryBatteryStatus" }, { 0x85, 0x0003, "SmartBatteryAlarmWarning" }, { 0x85, 0x0004, "SmartBatteryChargerMode" }, { 0x85, 0x0005, "SmartBatteryChargerStatus" }, { 0x85, 0x0006, "SmartBatteryChargerSpecInfo" }, { 0x85, 0x0007, "SmartBatterySelectorState" }, { 0x85, 0x0008, "SmartBatterySelectorPresets" }, { 0x85, 0x0009, "SmartBatterySelectorInfo" }, { 0x85, 0x0010, "OptionalMfgFunction1" }, { 0x85, 0x0011, "OptionalMfgFunction2" }, { 0x85, 0x0012, "OptionalMfgFunction3" }, { 0x85, 0x0013, "OptionalMfgFunction4" }, { 0x85, 0x0014, "OptionalMfgFunction5" }, { 0x85, 0x0015, "ConnectionToSMBus" }, { 0x85, 0x0016, "OutputConnection" }, { 0x85, 0x0017, "ChargerConnection" }, { 0x85, 0x0018, "BatteryInsertion" }, { 0x85, 0x0019, "UseNext" }, { 0x85, 0x001a, "OKToUse" }, { 0x85, 0x001b, "BatterySupported" }, { 0x85, 0x001c, "SelectorRevision" }, { 0x85, 0x001d, "ChargingIndicator" }, { 0x85, 0x0028, "ManufacturerAccess" }, { 0x85, 0x0029, "RemainingCapacityLimit" }, { 0x85, 0x002a, "RemainingTimeLimit" }, { 0x85, 0x002b, "AtRate" }, { 0x85, 0x002c, "CapacityMode" }, { 0x85, 0x002d, "BroadcastToCharger" }, { 0x85, 0x002e, "PrimaryBattery" }, { 0x85, 0x002f, "ChargeController" }, { 0x85, 0x0040, "TerminateCharge" }, { 0x85, 0x0041, "TerminateDischarge" }, { 0x85, 0x0042, "BelowRemainingCapacityLimit" }, { 0x85, 0x0043, "RemainingTimeLimitExpired" }, { 0x85, 0x0044, "Charging" }, { 0x85, 0x0045, "Discharging" }, { 0x85, 0x0046, "FullyCharged" }, { 0x85, 0x0047, "FullyDischarged" }, { 0x85, 0x0048, "ConditioningFlag" }, { 0x85, 0x0049, "AtRateOK" }, { 0x85, 0x004a, "SmartBatteryErrorCode" }, { 0x85, 0x004b, "NeedReplacement" }, { 0x85, 0x0060, "AtRateTimeToFull" }, { 0x85, 0x0061, "AtRateTimeToEmpty" }, { 0x85, 0x0062, "AverageCurrent" }, { 0x85, 0x0063, "MaxError" }, { 0x85, 0x0064, "RelativeStateOfCharge" }, { 0x85, 0x0065, "AbsoluteStateOfCharge" }, { 0x85, 0x0066, "RemainingCapacity" }, { 0x85, 0x0067, "FullChargeCapacity" }, { 0x85, 0x0068, "RunTimeToEmpty" }, { 0x85, 0x0069, "AverageTimeToEmpty" }, { 0x85, 0x006a, "AverageTimeToFull" }, { 0x85, 0x006b, "CycleCount" }, { 0x85, 0x0080, "BatteryPackModelLevel" }, { 0x85, 0x0081, "InternalChargeController" }, { 0x85, 0x0082, "PrimaryBatterySupport" }, { 0x85, 0x0083, "DesignCapacity" }, { 0x85, 0x0084, "SpecificationInfo" }, { 0x85, 0x0085, "ManufactureDate" }, { 0x85, 0x0086, "SerialNumber" }, { 0x85, 0x0087, "iManufacturerName" }, { 0x85, 0x0088, "iDeviceName" }, { 0x85, 0x0089, "iDeviceChemistry" }, { 0x85, 0x008a, "ManufacturerData" }, { 0x85, 0x008b, "Rechargable" }, { 0x85, 0x008c, "WarningCapacityLimit" }, { 0x85, 0x008d, "CapacityGranularity1" }, { 0x85, 0x008e, "CapacityGranularity2" }, { 0x85, 0x008f, "iOEMInformation" }, { 0x85, 0x00c0, "InhibitCharge" }, { 0x85, 0x00c1, "EnablePolling" }, { 0x85, 0x00c2, "ResetToZero" }, { 0x85, 0x00d0, "ACPresent" }, { 0x85, 0x00d1, "BatteryPresent" }, { 0x85, 0x00d2, "PowerFail" }, { 0x85, 0x00d3, "AlarmInhibited" }, { 0x85, 0x00d4, "ThermistorUnderRange" }, { 0x85, 0x00d5, "ThermistorHot" }, { 0x85, 0x00d6, "ThermistorCold" }, { 0x85, 0x00d7, "ThermistorOverRange" }, { 0x85, 0x00d8, "VoltageOutOfRange" }, { 0x85, 0x00d9, "CurrentOutOfRange" }, { 0x85, 0x00da, "CurrentNotRegulated" }, { 0x85, 0x00db, "VoltageNotRegulated" }, { 0x85, 0x00dc, "MasterMode" }, { 0x85, 0x00f0, "ChargerSelectorSupport" }, { 0x85, 0x00f1, "ChargerSpec" }, { 0x85, 0x00f2, "Level2" }, { 0x85, 0x00f3, "Level3" }, { 0x8c, 0, "BarcodeScanner" }, { 0x8c, 0x0001, "BarcodeBadgeReader" }, { 0x8c, 0x0002, "BarcodeScanner" }, { 0x8c, 0x0003, "DumbBarCodeScanner" }, { 0x8c, 0x0004, "CordlessScannerBase" }, { 0x8c, 0x0005, "BarCodeScannerCradle" }, { 0x8c, 0x0010, "AttributeReport" }, { 0x8c, 0x0011, "SettingsReport" }, { 0x8c, 0x0012, "ScannedDataReport" }, { 0x8c, 0x0013, "RawScannedDataReport" }, { 0x8c, 0x0014, "TriggerReport" }, { 0x8c, 0x0015, "StatusReport" }, { 0x8c, 0x0016, "UPCEANControlReport" }, { 0x8c, 0x0017, "EAN23LabelControlReport" }, { 0x8c, 0x0018, "Code39ControlReport" }, { 0x8c, 0x0019, "Interleaved2of5ControlReport" }, { 0x8c, 0x001a, "Standard2of5ControlReport" }, { 0x8c, 0x001b, "MSIPlesseyControlReport" }, { 0x8c, 0x001c, "CodabarControlReport" }, { 0x8c, 0x001d, "Code128ControlReport" }, { 0x8c, 0x001e, "Misc1DControlReport" }, { 0x8c, 0x001f, "2DControlReport" }, { 0x8c, 0x0030, "AimingPointerMode" }, { 0x8c, 0x0031, "BarCodePresentSensor" }, { 0x8c, 0x0032, "Class1ALaser" }, { 0x8c, 0x0033, "Class2Laser" }, { 0x8c, 0x0034, "HeaterPresent" }, { 0x8c, 0x0035, "ContactScanner" }, { 0x8c, 0x0036, "ElectronicArticleSurveillanceNotification" }, { 0x8c, 0x0037, "ConstantElectronicArticleSurveillance" }, { 0x8c, 0x0038, "ErrorIndication" }, { 0x8c, 0x0039, "FixedBeeper" }, { 0x8c, 0x003a, "GoodDecodeIndication" }, { 0x8c, 0x003b, "HandsFreeScanning" }, { 0x8c, 0x003c, "IntrinsicallySafe" }, { 0x8c, 0x003d, "KlasseEinsLaser" }, { 0x8c, 0x003e, "LongRangeScanner" }, { 0x8c, 0x003f, "MirrorSpeedControl" }, { 0x8c, 0x0040, "NotOnFileIndication" }, { 0x8c, 0x0041, "ProgrammableBeeper" }, { 0x8c, 0x0042, "Triggerless" }, { 0x8c, 0x0043, "Wand" }, { 0x8c, 0x0044, "WaterResistant" }, { 0x8c, 0x0045, "MultiRangeScanner" }, { 0x8c, 0x0046, "ProximitySensor" }, { 0x8c, 0x004d, "FragmentDecoding" }, { 0x8c, 0x004e, "ScannerReadConfidence" }, { 0x8c, 0x004f, "DataPrefix" }, { 0x8c, 0x0050, "PrefixAIMI" }, { 0x8c, 0x0051, "PrefixNone" }, { 0x8c, 0x0052, "PrefixProprietary" }, { 0x8c, 0x0055, "ActiveTime" }, { 0x8c, 0x0056, "AimingLaserPattern" }, { 0x8c, 0x0057, "BarCodePresent" }, { 0x8c, 0x0058, "BeeperState" }, { 0x8c, 0x0059, "LaserOnTime" }, { 0x8c, 0x005a, "LaserState" }, { 0x8c, 0x005b, "LockoutTime" }, { 0x8c, 0x005c, "MotorState" }, { 0x8c, 0x005d, "MotorTimeout" }, { 0x8c, 0x005e, "PowerOnResetScanner" }, { 0x8c, 0x005f, "PreventReadofBarcodes" }, { 0x8c, 0x0060, "InitiateBarcodeRead" }, { 0x8c, 0x0061, "TriggerState" }, { 0x8c, 0x0062, "TriggerMode" }, { 0x8c, 0x0063, "TriggerModeBlinkingLaserOn" }, { 0x8c, 0x0064, "TriggerModeContinuousLaserOn" }, { 0x8c, 0x0065, "TriggerModeLaseronwhilePulled" }, { 0x8c, 0x0066, "TriggerModeLaserstaysonafterrelease" }, { 0x8c, 0x006d, "CommitParameterstoNVM" }, { 0x8c, 0x006e, "ParameterScanning" }, { 0x8c, 0x006f, "ParametersChanged" }, { 0x8c, 0x0070, "Setparameterdefaultvalues" }, { 0x8c, 0x0075, "ScannerInCradle" }, { 0x8c, 0x0076, "ScannerInRange" }, { 0x8c, 0x007a, "AimDuration" }, { 0x8c, 0x007b, "GoodReadLampDuration" }, { 0x8c, 0x007c, "GoodReadLampIntensity" }, { 0x8c, 0x007d, "GoodReadLED" }, { 0x8c, 0x007e, "GoodReadToneFrequency" }, { 0x8c, 0x007f, "GoodReadToneLength" }, { 0x8c, 0x0080, "GoodReadToneVolume" }, { 0x8c, 0x0082, "NoReadMessage" }, { 0x8c, 0x0083, "NotonFileVolume" }, { 0x8c, 0x0084, "PowerupBeep" }, { 0x8c, 0x0085, "SoundErrorBeep" }, { 0x8c, 0x0086, "SoundGoodReadBeep" }, { 0x8c, 0x0087, "SoundNotOnFileBeep" }, { 0x8c, 0x0088, "GoodReadWhentoWrite" }, { 0x8c, 0x0089, "GRWTIAfterDecode" }, { 0x8c, 0x008a, "GRWTIBeepLampaftertransmit" }, { 0x8c, 0x008b, "GRWTINoBeepLampuseatall" }, { 0x8c, 0x0091, "BooklandEAN" }, { 0x8c, 0x0092, "ConvertEAN8to13Type" }, { 0x8c, 0x0093, "ConvertUPCAtoEAN13" }, { 0x8c, 0x0094, "ConvertUPCEtoA" }, { 0x8c, 0x0095, "EAN13" }, { 0x8c, 0x0096, "EAN8" }, { 0x8c, 0x0097, "EAN99128Mandatory" }, { 0x8c, 0x0098, "EAN99P5128Optional" }, { 0x8c, 0x0099, "EnableEANTwoLabel" }, { 0x8c, 0x009a, "UPCEAN" }, { 0x8c, 0x009b, "UPCEANCouponCode" }, { 0x8c, 0x009c, "UPCEANPeriodicals" }, { 0x8c, 0x009d, "UPCA" }, { 0x8c, 0x009e, "UPCAwith128Mandatory" }, { 0x8c, 0x009f, "UPCAwith128Optional" }, { 0x8c, 0x00a0, "UPCAwithP5Optional" }, { 0x8c, 0x00a1, "UPCE" }, { 0x8c, 0x00a2, "UPCE1" }, { 0x8c, 0x00a9, "Periodical" }, { 0x8c, 0x00aa, "PeriodicalAutoDiscriminate2" }, { 0x8c, 0x00ab, "PeriodicalOnlyDecodewith2" }, { 0x8c, 0x00ac, "PeriodicalIgnore2" }, { 0x8c, 0x00ad, "PeriodicalAutoDiscriminate5" }, { 0x8c, 0x00ae, "PeriodicalOnlyDecodewith5" }, { 0x8c, 0x00af, "PeriodicalIgnore5" }, { 0x8c, 0x00b0, "Check" }, { 0x8c, 0x00b1, "CheckDisablePrice" }, { 0x8c, 0x00b2, "CheckEnable4digitPrice" }, { 0x8c, 0x00b3, "CheckEnable5digitPrice" }, { 0x8c, 0x00b4, "CheckEnableEuropean4digitPrice" }, { 0x8c, 0x00b5, "CheckEnableEuropean5digitPrice" }, { 0x8c, 0x00b7, "EANTwoLabel" }, { 0x8c, 0x00b8, "EANThreeLabel" }, { 0x8c, 0x00b9, "EAN8FlagDigit1" }, { 0x8c, 0x00ba, "EAN8FlagDigit2" }, { 0x8c, 0x00bb, "EAN8FlagDigit3" }, { 0x8c, 0x00bc, "EAN13FlagDigit1" }, { 0x8c, 0x00bd, "EAN13FlagDigit2" }, { 0x8c, 0x00be, "EAN13FlagDigit3" }, { 0x8c, 0x00bf, "AddEAN23LabelDefinition" }, { 0x8c, 0x00c0, "ClearallEAN23LabelDefinitions" }, { 0x8c, 0x00c3, "Codabar" }, { 0x8c, 0x00c4, "Code128" }, { 0x8c, 0x00c7, "Code39" }, { 0x8c, 0x00c8, "Code93" }, { 0x8c, 0x00c9, "FullASCIIConversion" }, { 0x8c, 0x00ca, "Interleaved2of5" }, { 0x8c, 0x00cb, "ItalianPharmacyCode" }, { 0x8c, 0x00cc, "MSIPlessey" }, { 0x8c, 0x00cd, "Standard2of5IATA" }, { 0x8c, 0x00ce, "Standard2of5" }, { 0x8c, 0x00d3, "TransmitStartStop" }, { 0x8c, 0x00d4, "TriOptic" }, { 0x8c, 0x00d5, "UCCEAN128" }, { 0x8c, 0x00d6, "CheckDigit" }, { 0x8c, 0x00d7, "CheckDigitDisable" }, { 0x8c, 0x00d8, "CheckDigitEnableInterleaved2of5OPCC" }, { 0x8c, 0x00d9, "CheckDigitEnableInterleaved2of5USS" }, { 0x8c, 0x00da, "CheckDigitEnableStandard2of5OPCC" }, { 0x8c, 0x00db, "CheckDigitEnableStandard2of5USS" }, { 0x8c, 0x00dc, "CheckDigitEnableOneMSIPlessey" }, { 0x8c, 0x00dd, "CheckDigitEnableTwoMSIPlessey" }, { 0x8c, 0x00de, "CheckDigitCodabarEnable" }, { 0x8c, 0x00df, "CheckDigitCode39Enable" }, { 0x8c, 0x00f0, "TransmitCheckDigit" }, { 0x8c, 0x00f1, "DisableCheckDigitTransmit" }, { 0x8c, 0x00f2, "EnableCheckDigitTransmit" }, { 0x8c, 0x00fb, "SymbologyIdentifier1" }, { 0x8c, 0x00fc, "SymbologyIdentifier2" }, { 0x8c, 0x00fd, "SymbologyIdentifier3" }, { 0x8c, 0x00fe, "DecodedData" }, { 0x8c, 0x00ff, "DecodeDataContinued" }, { 0x8c, 0x0100, "BarSpaceData" }, { 0x8c, 0x0101, "ScannerDataAccuracy" }, { 0x8c, 0x0102, "RawDataPolarity" }, { 0x8c, 0x0103, "PolarityInvertedBarCode" }, { 0x8c, 0x0104, "PolarityNormalBarCode" }, { 0x8c, 0x0106, "MinimumLengthtoDecode" }, { 0x8c, 0x0107, "MaximumLengthtoDecode" }, { 0x8c, 0x0108, "DiscreteLengthtoDecode1" }, { 0x8c, 0x0109, "DiscreteLengthtoDecode2" }, { 0x8c, 0x010a, "DataLengthMethod" }, { 0x8c, 0x010b, "DLMethodReadany" }, { 0x8c, 0x010c, "DLMethodCheckinRange" }, { 0x8c, 0x010d, "DLMethodCheckforDiscrete" }, { 0x8c, 0x0110, "AztecCode" }, { 0x8c, 0x0111, "BC412" }, { 0x8c, 0x0112, "ChannelCode" }, { 0x8c, 0x0113, "Code16" }, { 0x8c, 0x0114, "Code32" }, { 0x8c, 0x0115, "Code49" }, { 0x8c, 0x0116, "CodeOne" }, { 0x8c, 0x0117, "Colorcode" }, { 0x8c, 0x0118, "DataMatrix" }, { 0x8c, 0x0119, "MaxiCode" }, { 0x8c, 0x011a, "MicroPDF" }, { 0x8c, 0x011b, "PDF417" }, { 0x8c, 0x011c, "PosiCode" }, { 0x8c, 0x011d, "QRCode" }, { 0x8c, 0x011e, "SuperCode" }, { 0x8c, 0x011f, "UltraCode" }, { 0x8c, 0x0120, "USD5SlugCode" }, { 0x8c, 0x0121, "VeriCode" }, { 0x8d, 0, "Scales" }, { 0x8d, 0x0001, "Scales" }, { 0x8d, 0x0020, "ScaleDevice" }, { 0x8d, 0x0021, "ScaleClass" }, { 0x8d, 0x0022, "ScaleClassIMetric" }, { 0x8d, 0x0023, "ScaleClassIIMetric" }, { 0x8d, 0x0024, "ScaleClassIIIMetric" }, { 0x8d, 0x0025, "ScaleClassIIILMetric" }, { 0x8d, 0x0026, "ScaleClassIVMetric" }, { 0x8d, 0x0027, "ScaleClassIIIEnglish" }, { 0x8d, 0x0028, "ScaleClassIIILEnglish" }, { 0x8d, 0x0029, "ScaleClassIVEnglish" }, { 0x8d, 0x002a, "ScaleClassGeneric" }, { 0x8d, 0x0030, "ScaleAttributeReport" }, { 0x8d, 0x0031, "ScaleControlReport" }, { 0x8d, 0x0032, "ScaleDataReport" }, { 0x8d, 0x0033, "ScaleStatusReport" }, { 0x8d, 0x0034, "ScaleWeightLimitReport" }, { 0x8d, 0x0035, "ScaleStatisticsReport" }, { 0x8d, 0x0040, "DataWeight" }, { 0x8d, 0x0041, "DataScaling" }, { 0x8d, 0x0050, "WeightUnit" }, { 0x8d, 0x0051, "WeightUnitMilligram" }, { 0x8d, 0x0052, "WeightUnitGram" }, { 0x8d, 0x0053, "WeightUnitKilogram" }, { 0x8d, 0x0054, "WeightUnitCarats" }, { 0x8d, 0x0055, "WeightUnitTaels" }, { 0x8d, 0x0056, "WeightUnitGrains" }, { 0x8d, 0x0057, "WeightUnitPennyweights" }, { 0x8d, 0x0058, "WeightUnitMetricTon" }, { 0x8d, 0x0059, "WeightUnitAvoirTon" }, { 0x8d, 0x005a, "WeightUnitTroyOunce" }, { 0x8d, 0x005b, "WeightUnitOunce" }, { 0x8d, 0x005c, "WeightUnitPound" }, { 0x8d, 0x0060, "CalibrationCount" }, { 0x8d, 0x0061, "ReZeroCount" }, { 0x8d, 0x0070, "ScaleStatus" }, { 0x8d, 0x0071, "ScaleStatusFault" }, { 0x8d, 0x0072, "ScaleStatusStableatCenterofZero" }, { 0x8d, 0x0073, "ScaleStatusInMotion" }, { 0x8d, 0x0074, "ScaleStatusWeightStable" }, { 0x8d, 0x0075, "ScaleStatusUnderZero" }, { 0x8d, 0x0076, "ScaleStatusOverWeightLimit" }, { 0x8d, 0x0077, "ScaleStatusRequiresCalibration" }, { 0x8d, 0x0078, "ScaleStatusRequiresRezeroing" }, { 0x8d, 0x0080, "ZeroScale" }, { 0x8d, 0x0081, "EnforcedZeroReturn" }, { 0x8e, 0, "MagneticStripeReader" }, { 0x8e, 0x0001, "MSRDeviceReadOnly" }, { 0x8e, 0x0011, "Track1Length" }, { 0x8e, 0x0012, "Track2Length" }, { 0x8e, 0x0013, "Track3Length" }, { 0x8e, 0x0014, "TrackJISLength" }, { 0x8e, 0x0020, "TrackData" }, { 0x8e, 0x0021, "Track1Data" }, { 0x8e, 0x0022, "Track2Data" }, { 0x8e, 0x0023, "Track3Data" }, { 0x8e, 0x0024, "TrackJISData" }, { 0x90, 0, "CameraControl" }, { 0x90, 0x0020, "CameraAutofocus" }, { 0x90, 0x0021, "CameraShutter" }, { 0x91, 0, "Arcade" }, { 0x91, 0x0001, "GeneralPurposeIOCard" }, { 0x91, 0x0002, "CoinDoor" }, { 0x91, 0x0003, "WatchdogTimer" }, { 0x91, 0x0030, "GeneralPurposeAnalogInputState" }, { 0x91, 0x0031, "GeneralPurposeDigitalInputState" }, { 0x91, 0x0032, "GeneralPurposeOpticalInputState" }, { 0x91, 0x0033, "GeneralPurposeDigitalOutputState" }, { 0x91, 0x0034, "NumberofCoinDoors" }, { 0x91, 0x0035, "CoinDrawerDropCount" }, { 0x91, 0x0036, "CoinDrawerStart" }, { 0x91, 0x0037, "CoinDrawerService" }, { 0x91, 0x0038, "CoinDrawerTilt" }, { 0x91, 0x0039, "CoinDoorTest" }, { 0x91, 0x0040, "CoinDoorLockout" }, { 0x91, 0x0041, "WatchdogTimeout" }, { 0x91, 0x0042, "WatchdogAction" }, { 0x91, 0x0043, "WatchdogReboot" }, { 0x91, 0x0044, "WatchdogRestart" }, { 0x91, 0x0045, "AlarmInput" }, { 0x91, 0x0046, "CoinDoorCounter" }, { 0x91, 0x0047, "IODirectionMapping" }, { 0x91, 0x0048, "SetIODirectionMapping" }, { 0x91, 0x0049, "ExtendedOpticalInputState" }, { 0x91, 0x004a, "PinPadInputState" }, { 0x91, 0x004b, "PinPadStatus" }, { 0x91, 0x004c, "PinPadOutput" }, { 0x91, 0x004d, "PinPadCommand" }, { 0xf1d0, 0, "FIDOAlliance" }, { 0xf1d0, 0x0001, "U2FAuthenticatorDevice" }, { 0xf1d0, 0x0020, "InputReportData" }, { 0xf1d0, 0x0021, "OutputReportData" }, /* pages 0xff00 to 0xffff are vendor-specific */ { 0xffff, 0, "Vendor-specific-FF" }, { 0, 0, NULL } }; /* Either output directly into simple seq_file, or (if f == NULL) * allocate a separate buffer that will then be passed to the 'events' * ringbuffer. * * This is because these functions can be called both for "one-shot" * "rdesc" while resolving, or for blocking "events". * * This holds both for resolv_usage_page() and hid_resolv_usage(). */ static char *resolv_usage_page(unsigned page, struct seq_file *f) { const struct hid_usage_entry *p; char *buf = NULL; if (!f) { buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return ERR_PTR(-ENOMEM); } for (p = hid_usage_table; p->description; p++) if (p->page == page) { if (!f) { snprintf(buf, HID_DEBUG_BUFSIZE, "%s", p->description); return buf; } else { seq_printf(f, "%s", p->description); return NULL; } } if (!f) snprintf(buf, HID_DEBUG_BUFSIZE, "%04x", page); else seq_printf(f, "%04x", page); return buf; } char *hid_resolv_usage(unsigned usage, struct seq_file *f) { const struct hid_usage_entry *p; const struct hid_usage_entry *m; char *buf = NULL; int len = 0; const char *modifier = NULL; unsigned int usage_modifier = usage & 0xF000; unsigned int usage_actual = usage & 0xFFFF; buf = resolv_usage_page(usage >> 16, f); if (IS_ERR(buf)) { pr_err("error allocating HID debug buffer\n"); return NULL; } if (!f) { len = strlen(buf); len += scnprintf(buf + len, HID_DEBUG_BUFSIZE - len, "."); } else { seq_printf(f, "."); } for (p = hid_usage_table; p->description; p++) if (p->page == (usage >> 16)) { if (p->page == 0x20 && usage_modifier) { for (m = p; m->description; m++) { if (p->page == m->page && m->usage == usage_modifier) { modifier = m->description; break; } } if (modifier) usage_actual = usage_actual & 0x0FFF; } if (!modifier) modifier = ""; for(++p; p->description && p->usage != 0; p++) if (p->usage == usage_actual) { if (!f) snprintf(buf + len, HID_DEBUG_BUFSIZE - len, "%s%s", p->description, modifier); else seq_printf(f, "%s%s", p->description, modifier); return buf; } break; } if (!f) snprintf(buf + len, HID_DEBUG_BUFSIZE - len, "%04x", usage & 0xffff); else seq_printf(f, "%04x", usage & 0xffff); return buf; } EXPORT_SYMBOL_GPL(hid_resolv_usage); static void tab(int n, struct seq_file *f) { seq_printf(f, "%*s", n, ""); } void hid_dump_field(struct hid_field *field, int n, struct seq_file *f) { int j; if (field->physical) { tab(n, f); seq_printf(f, "Physical("); hid_resolv_usage(field->physical, f); seq_printf(f, ")\n"); } if (field->logical) { tab(n, f); seq_printf(f, "Logical("); hid_resolv_usage(field->logical, f); seq_printf(f, ")\n"); } if (field->application) { tab(n, f); seq_printf(f, "Application("); hid_resolv_usage(field->application, f); seq_printf(f, ")\n"); } tab(n, f); seq_printf(f, "Usage(%d)\n", field->maxusage); for (j = 0; j < field->maxusage; j++) { tab(n+2, f); hid_resolv_usage(field->usage[j].hid, f); seq_printf(f, "\n"); } if (field->logical_minimum != field->logical_maximum) { tab(n, f); seq_printf(f, "Logical Minimum(%d)\n", field->logical_minimum); tab(n, f); seq_printf(f, "Logical Maximum(%d)\n", field->logical_maximum); } if (field->physical_minimum != field->physical_maximum) { tab(n, f); seq_printf(f, "Physical Minimum(%d)\n", field->physical_minimum); tab(n, f); seq_printf(f, "Physical Maximum(%d)\n", field->physical_maximum); } if (field->unit_exponent) { tab(n, f); seq_printf(f, "Unit Exponent(%d)\n", field->unit_exponent); } if (field->unit) { static const char *systems[5] = { "None", "SI Linear", "SI Rotation", "English Linear", "English Rotation" }; static const char *units[5][8] = { { "None", "None", "None", "None", "None", "None", "None", "None" }, { "None", "Centimeter", "Gram", "Seconds", "Kelvin", "Ampere", "Candela", "None" }, { "None", "Radians", "Gram", "Seconds", "Kelvin", "Ampere", "Candela", "None" }, { "None", "Inch", "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" }, { "None", "Degrees", "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" } }; int i; int sys; __u32 data = field->unit; /* First nibble tells us which system we're in. */ sys = data & 0xf; data >>= 4; if(sys > 4) { tab(n, f); seq_printf(f, "Unit(Invalid)\n"); } else { int earlier_unit = 0; tab(n, f); seq_printf(f, "Unit(%s : ", systems[sys]); for (i=1 ; i<sizeof(__u32)*2 ; i++) { char nibble = data & 0xf; data >>= 4; if (nibble != 0) { if(earlier_unit++ > 0) seq_printf(f, "*"); seq_printf(f, "%s", units[sys][i]); if(nibble != 1) { /* This is a _signed_ nibble(!) */ int val = nibble & 0x7; if(nibble & 0x08) val = -((0x7 & ~val) +1); seq_printf(f, "^%d", val); } } } seq_printf(f, ")\n"); } } tab(n, f); seq_printf(f, "Report Size(%u)\n", field->report_size); tab(n, f); seq_printf(f, "Report Count(%u)\n", field->report_count); tab(n, f); seq_printf(f, "Report Offset(%u)\n", field->report_offset); tab(n, f); seq_printf(f, "Flags( "); j = field->flags; seq_printf(f, "%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array "); seq_printf(f, "%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute "); seq_printf(f, "%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : ""); seq_printf(f, ")\n"); } EXPORT_SYMBOL_GPL(hid_dump_field); void hid_dump_device(struct hid_device *device, struct seq_file *f) { struct hid_report_enum *report_enum; struct hid_report *report; struct list_head *list; unsigned i,k; static const char *table[] = {"INPUT", "OUTPUT", "FEATURE"}; for (i = 0; i < HID_REPORT_TYPES; i++) { report_enum = device->report_enum + i; list = report_enum->report_list.next; while (list != &report_enum->report_list) { report = (struct hid_report *) list; tab(2, f); seq_printf(f, "%s", table[i]); if (report->id) seq_printf(f, "(%d)", report->id); seq_printf(f, "[%s]", table[report->type]); seq_printf(f, "\n"); for (k = 0; k < report->maxfield; k++) { tab(4, f); seq_printf(f, "Field(%d)\n", k); hid_dump_field(report->field[k], 6, f); } list = list->next; } } } EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); list_for_each_entry(list, &hdev->debug_list, node) kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_debug_event); void hid_dump_report(struct hid_device *hid, int type, u8 *data, int size) { struct hid_report_enum *report_enum; char *buf; unsigned int i; buf = kmalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return; report_enum = hid->report_enum + type; /* dump the report */ snprintf(buf, HID_DEBUG_BUFSIZE - 1, "\nreport (size %u) (%snumbered) = ", size, report_enum->numbered ? "" : "un"); hid_debug_event(hid, buf); for (i = 0; i < size; i++) { snprintf(buf, HID_DEBUG_BUFSIZE - 1, " %02x", data[i]); hid_debug_event(hid, buf); } hid_debug_event(hid, "\n"); kfree(buf); } EXPORT_SYMBOL_GPL(hid_dump_report); void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 value) { char *buf; int len; buf = hid_resolv_usage(usage->hid, NULL); if (!buf) return; len = strlen(buf); snprintf(buf + len, HID_DEBUG_BUFSIZE - len - 1, " = %d\n", value); hid_debug_event(hdev, buf); kfree(buf); wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); static const char *events[EV_MAX + 1] = { [EV_SYN] = "Sync", [EV_KEY] = "Key", [EV_REL] = "Relative", [EV_ABS] = "Absolute", [EV_MSC] = "Misc", [EV_LED] = "LED", [EV_SND] = "Sound", [EV_REP] = "Repeat", [EV_FF] = "ForceFeedback", [EV_PWR] = "Power", [EV_FF_STATUS] = "ForceFeedbackStatus", [EV_SW] = "Software", }; static const char *syncs[SYN_CNT] = { [SYN_REPORT] = "Report", [SYN_CONFIG] = "Config", [SYN_MT_REPORT] = "MT Report", [SYN_DROPPED] = "Dropped", }; static const char *keys[KEY_MAX + 1] = { [KEY_RESERVED] = "Reserved", [KEY_ESC] = "Esc", [KEY_1] = "1", [KEY_2] = "2", [KEY_3] = "3", [KEY_4] = "4", [KEY_5] = "5", [KEY_6] = "6", [KEY_7] = "7", [KEY_8] = "8", [KEY_9] = "9", [KEY_0] = "0", [KEY_MINUS] = "Minus", [KEY_EQUAL] = "Equal", [KEY_BACKSPACE] = "Backspace", [KEY_TAB] = "Tab", [KEY_Q] = "Q", [KEY_W] = "W", [KEY_E] = "E", [KEY_R] = "R", [KEY_T] = "T", [KEY_Y] = "Y", [KEY_U] = "U", [KEY_I] = "I", [KEY_O] = "O", [KEY_P] = "P", [KEY_LEFTBRACE] = "LeftBrace", [KEY_RIGHTBRACE] = "RightBrace", [KEY_ENTER] = "Enter", [KEY_LEFTCTRL] = "LeftControl", [KEY_A] = "A", [KEY_S] = "S", [KEY_D] = "D", [KEY_F] = "F", [KEY_G] = "G", [KEY_H] = "H", [KEY_J] = "J", [KEY_K] = "K", [KEY_L] = "L", [KEY_SEMICOLON] = "Semicolon", [KEY_APOSTROPHE] = "Apostrophe", [KEY_GRAVE] = "Grave", [KEY_LEFTSHIFT] = "LeftShift", [KEY_BACKSLASH] = "BackSlash", [KEY_Z] = "Z", [KEY_X] = "X", [KEY_C] = "C", [KEY_V] = "V", [KEY_B] = "B", [KEY_N] = "N", [KEY_M] = "M", [KEY_COMMA] = "Comma", [KEY_DOT] = "Dot", [KEY_SLASH] = "Slash", [KEY_RIGHTSHIFT] = "RightShift", [KEY_KPASTERISK] = "KPAsterisk", [KEY_LEFTALT] = "LeftAlt", [KEY_SPACE] = "Space", [KEY_CAPSLOCK] = "CapsLock", [KEY_F1] = "F1", [KEY_F2] = "F2", [KEY_F3] = "F3", [KEY_F4] = "F4", [KEY_F5] = "F5", [KEY_F6] = "F6", [KEY_F7] = "F7", [KEY_F8] = "F8", [KEY_F9] = "F9", [KEY_F10] = "F10", [KEY_NUMLOCK] = "NumLock", [KEY_SCROLLLOCK] = "ScrollLock", [KEY_KP7] = "KP7", [KEY_KP8] = "KP8", [KEY_KP9] = "KP9", [KEY_KPMINUS] = "KPMinus", [KEY_KP4] = "KP4", [KEY_KP5] = "KP5", [KEY_KP6] = "KP6", [KEY_KPPLUS] = "KPPlus", [KEY_KP1] = "KP1", [KEY_KP2] = "KP2", [KEY_KP3] = "KP3", [KEY_KP0] = "KP0", [KEY_KPDOT] = "KPDot", [KEY_ZENKAKUHANKAKU] = "Zenkaku/Hankaku", [KEY_102ND] = "102nd", [KEY_F11] = "F11", [KEY_F12] = "F12", [KEY_RO] = "RO", [KEY_KATAKANA] = "Katakana", [KEY_HIRAGANA] = "HIRAGANA", [KEY_HENKAN] = "Henkan", [KEY_KATAKANAHIRAGANA] = "Katakana/Hiragana", [KEY_MUHENKAN] = "Muhenkan", [KEY_KPJPCOMMA] = "KPJpComma", [KEY_KPENTER] = "KPEnter", [KEY_RIGHTCTRL] = "RightCtrl", [KEY_KPSLASH] = "KPSlash", [KEY_SYSRQ] = "SysRq", [KEY_RIGHTALT] = "RightAlt", [KEY_LINEFEED] = "LineFeed", [KEY_HOME] = "Home", [KEY_UP] = "Up", [KEY_PAGEUP] = "PageUp", [KEY_LEFT] = "Left", [KEY_RIGHT] = "Right", [KEY_END] = "End", [KEY_DOWN] = "Down", [KEY_PAGEDOWN] = "PageDown", [KEY_INSERT] = "Insert", [KEY_DELETE] = "Delete", [KEY_MACRO] = "Macro", [KEY_MUTE] = "Mute", [KEY_VOLUMEDOWN] = "VolumeDown", [KEY_VOLUMEUP] = "VolumeUp", [KEY_POWER] = "Power", [KEY_KPEQUAL] = "KPEqual", [KEY_KPPLUSMINUS] = "KPPlusMinus", [KEY_PAUSE] = "Pause", [KEY_KPCOMMA] = "KPComma", [KEY_HANGUEL] = "Hangeul", [KEY_HANJA] = "Hanja", [KEY_YEN] = "Yen", [KEY_LEFTMETA] = "LeftMeta", [KEY_RIGHTMETA] = "RightMeta", [KEY_COMPOSE] = "Compose", [KEY_STOP] = "Stop", [KEY_AGAIN] = "Again", [KEY_PROPS] = "Props", [KEY_UNDO] = "Undo", [KEY_FRONT] = "Front", [KEY_COPY] = "Copy", [KEY_OPEN] = "Open", [KEY_PASTE] = "Paste", [KEY_FIND] = "Find", [KEY_CUT] = "Cut", [KEY_HELP] = "Help", [KEY_MENU] = "Menu", [KEY_CALC] = "Calc", [KEY_SETUP] = "Setup", [KEY_SLEEP] = "Sleep", [KEY_WAKEUP] = "WakeUp", [KEY_FILE] = "File", [KEY_SENDFILE] = "SendFile", [KEY_DELETEFILE] = "DeleteFile", [KEY_XFER] = "X-fer", [KEY_PROG1] = "Prog1", [KEY_PROG2] = "Prog2", [KEY_WWW] = "WWW", [KEY_MSDOS] = "MSDOS", [KEY_COFFEE] = "Coffee", [KEY_ROTATE_DISPLAY] = "RotateDisplay", [KEY_CYCLEWINDOWS] = "CycleWindows", [KEY_MAIL] = "Mail", [KEY_BOOKMARKS] = "Bookmarks", [KEY_COMPUTER] = "Computer", [KEY_BACK] = "Back", [KEY_FORWARD] = "Forward", [KEY_CLOSECD] = "CloseCD", [KEY_EJECTCD] = "EjectCD", [KEY_EJECTCLOSECD] = "EjectCloseCD", [KEY_NEXTSONG] = "NextSong", [KEY_PLAYPAUSE] = "PlayPause", [KEY_PREVIOUSSONG] = "PreviousSong", [KEY_STOPCD] = "StopCD", [KEY_RECORD] = "Record", [KEY_REWIND] = "Rewind", [KEY_PHONE] = "Phone", [KEY_ISO] = "ISOKey", [KEY_CONFIG] = "Config", [KEY_HOMEPAGE] = "HomePage", [KEY_REFRESH] = "Refresh", [KEY_EXIT] = "Exit", [KEY_MOVE] = "Move", [KEY_EDIT] = "Edit", [KEY_SCROLLUP] = "ScrollUp", [KEY_SCROLLDOWN] = "ScrollDown", [KEY_KPLEFTPAREN] = "KPLeftParenthesis", [KEY_KPRIGHTPAREN] = "KPRightParenthesis", [KEY_NEW] = "New", [KEY_REDO] = "Redo", [KEY_F13] = "F13", [KEY_F14] = "F14", [KEY_F15] = "F15", [KEY_F16] = "F16", [KEY_F17] = "F17", [KEY_F18] = "F18", [KEY_F19] = "F19", [KEY_F20] = "F20", [KEY_F21] = "F21", [KEY_F22] = "F22", [KEY_F23] = "F23", [KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD", [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3", [KEY_PROG4] = "Prog4", [KEY_ALL_APPLICATIONS] = "AllApplications", [KEY_SUSPEND] = "Suspend", [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play", [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost", [KEY_PRINT] = "Print", [KEY_HP] = "HP", [KEY_CAMERA] = "Camera", [KEY_SOUND] = "Sound", [KEY_QUESTION] = "Question", [KEY_EMAIL] = "Email", [KEY_CHAT] = "Chat", [KEY_SEARCH] = "Search", [KEY_CONNECT] = "Connect", [KEY_FINANCE] = "Finance", [KEY_SPORT] = "Sport", [KEY_SHOP] = "Shop", [KEY_ALTERASE] = "AlternateErase", [KEY_CANCEL] = "Cancel", [KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp", [KEY_MEDIA] = "Media", [KEY_UNKNOWN] = "Unknown", [BTN_DPAD_UP] = "BtnDPadUp", [BTN_DPAD_DOWN] = "BtnDPadDown", [BTN_DPAD_LEFT] = "BtnDPadLeft", [BTN_DPAD_RIGHT] = "BtnDPadRight", [BTN_0] = "Btn0", [BTN_1] = "Btn1", [BTN_2] = "Btn2", [BTN_3] = "Btn3", [BTN_4] = "Btn4", [BTN_5] = "Btn5", [BTN_6] = "Btn6", [BTN_7] = "Btn7", [BTN_8] = "Btn8", [BTN_9] = "Btn9", [BTN_LEFT] = "LeftBtn", [BTN_RIGHT] = "RightBtn", [BTN_MIDDLE] = "MiddleBtn", [BTN_SIDE] = "SideBtn", [BTN_EXTRA] = "ExtraBtn", [BTN_FORWARD] = "ForwardBtn", [BTN_BACK] = "BackBtn", [BTN_TASK] = "TaskBtn", [BTN_TRIGGER] = "Trigger", [BTN_THUMB] = "ThumbBtn", [BTN_THUMB2] = "ThumbBtn2", [BTN_TOP] = "TopBtn", [BTN_TOP2] = "TopBtn2", [BTN_PINKIE] = "PinkieBtn", [BTN_BASE] = "BaseBtn", [BTN_BASE2] = "BaseBtn2", [BTN_BASE3] = "BaseBtn3", [BTN_BASE4] = "BaseBtn4", [BTN_BASE5] = "BaseBtn5", [BTN_BASE6] = "BaseBtn6", [BTN_DEAD] = "BtnDead", [BTN_A] = "BtnA", [BTN_B] = "BtnB", [BTN_C] = "BtnC", [BTN_X] = "BtnX", [BTN_Y] = "BtnY", [BTN_Z] = "BtnZ", [BTN_TL] = "BtnTL", [BTN_TR] = "BtnTR", [BTN_TL2] = "BtnTL2", [BTN_TR2] = "BtnTR2", [BTN_SELECT] = "BtnSelect", [BTN_START] = "BtnStart", [BTN_MODE] = "BtnMode", [BTN_THUMBL] = "BtnThumbL", [BTN_THUMBR] = "BtnThumbR", [BTN_TOOL_PEN] = "ToolPen", [BTN_TOOL_RUBBER] = "ToolRubber", [BTN_TOOL_BRUSH] = "ToolBrush", [BTN_TOOL_PENCIL] = "ToolPencil", [BTN_TOOL_AIRBRUSH] = "ToolAirbrush", [BTN_TOOL_FINGER] = "ToolFinger", [BTN_TOOL_MOUSE] = "ToolMouse", [BTN_TOOL_LENS] = "ToolLens", [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", [BTN_GEAR_DOWN] = "WheelBtn", [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", [KEY_TIME] = "Time", [KEY_VENDOR] = "Vendor", [KEY_ARCHIVE] = "Archive", [KEY_PROGRAM] = "Program", [KEY_CHANNEL] = "Channel", [KEY_FAVORITES] = "Favorites", [KEY_EPG] = "EPG", [KEY_PVR] = "PVR", [KEY_MHP] = "MHP", [KEY_LANGUAGE] = "Language", [KEY_TITLE] = "Title", [KEY_SUBTITLE] = "Subtitle", [KEY_ANGLE] = "Angle", [KEY_MODE] = "Mode", [KEY_KEYBOARD] = "Keyboard", [KEY_PC] = "PC", [KEY_TV] = "TV", [KEY_TV2] = "TV2", [KEY_VCR] = "VCR", [KEY_VCR2] = "VCR2", [KEY_SAT] = "Sat", [KEY_SAT2] = "Sat2", [KEY_CD] = "CD", [KEY_TAPE] = "Tape", [KEY_RADIO] = "Radio", [KEY_TUNER] = "Tuner", [KEY_PLAYER] = "Player", [KEY_TEXT] = "Text", [KEY_DVD] = "DVD", [KEY_AUX] = "Aux", [KEY_MP3] = "MP3", [KEY_AUDIO] = "Audio", [KEY_VIDEO] = "Video", [KEY_DIRECTORY] = "Directory", [KEY_LIST] = "List", [KEY_MEMO] = "Memo", [KEY_CALENDAR] = "Calendar", [KEY_RED] = "Red", [KEY_GREEN] = "Green", [KEY_YELLOW] = "Yellow", [KEY_BLUE] = "Blue", [KEY_CHANNELUP] = "ChannelUp", [KEY_CHANNELDOWN] = "ChannelDown", [KEY_FIRST] = "First", [KEY_LAST] = "Last", [KEY_AB] = "AB", [KEY_NEXT] = "Next", [KEY_RESTART] = "Restart", [KEY_SLOW] = "Slow", [KEY_SHUFFLE] = "Shuffle", [KEY_BREAK] = "Break", [KEY_PREVIOUS] = "Previous", [KEY_DIGITS] = "Digits", [KEY_TEEN] = "TEEN", [KEY_TWEN] = "TWEN", [KEY_DEL_EOL] = "DeleteEOL", [KEY_DEL_EOS] = "DeleteEOS", [KEY_INS_LINE] = "InsertLine", [KEY_DEL_LINE] = "DeleteLine", [KEY_SEND] = "Send", [KEY_REPLY] = "Reply", [KEY_FORWARDMAIL] = "ForwardMail", [KEY_SAVE] = "Save", [KEY_DOCUMENTS] = "Documents", [KEY_SPELLCHECK] = "SpellCheck", [KEY_LOGOFF] = "Logoff", [KEY_FN] = "Fn", [KEY_FN_ESC] = "Fn+ESC", [KEY_FN_1] = "Fn+1", [KEY_FN_2] = "Fn+2", [KEY_FN_B] = "Fn+B", [KEY_FN_D] = "Fn+D", [KEY_FN_E] = "Fn+E", [KEY_FN_F] = "Fn+F", [KEY_FN_S] = "Fn+S", [KEY_FN_F1] = "Fn+F1", [KEY_FN_F2] = "Fn+F2", [KEY_FN_F3] = "Fn+F3", [KEY_FN_F4] = "Fn+F4", [KEY_FN_F5] = "Fn+F5", [KEY_FN_F6] = "Fn+F6", [KEY_FN_F7] = "Fn+F7", [KEY_FN_F8] = "Fn+F8", [KEY_FN_F9] = "Fn+F9", [KEY_FN_F10] = "Fn+F10", [KEY_FN_F11] = "Fn+F11", [KEY_FN_F12] = "Fn+F12", [KEY_KBDILLUMTOGGLE] = "KbdIlluminationToggle", [KEY_KBDILLUMDOWN] = "KbdIlluminationDown", [KEY_KBDILLUMUP] = "KbdIlluminationUp", [KEY_SWITCHVIDEOMODE] = "SwitchVideoMode", [KEY_BUTTONCONFIG] = "ButtonConfig", [KEY_TASKMANAGER] = "TaskManager", [KEY_JOURNAL] = "Journal", [KEY_CONTROLPANEL] = "ControlPanel", [KEY_APPSELECT] = "AppSelect", [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", [KEY_ASSISTANT] = "Assistant", [KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext", [KEY_EMOJI_PICKER] = "EmojiPicker", [KEY_CAMERA_ACCESS_ENABLE] = "CameraAccessEnable", [KEY_CAMERA_ACCESS_DISABLE] = "CameraAccessDisable", [KEY_CAMERA_ACCESS_TOGGLE] = "CameraAccessToggle", [KEY_ACCESSIBILITY] = "Accessibility", [KEY_DO_NOT_DISTURB] = "DoNotDisturb", [KEY_DICTATE] = "Dictate", [KEY_MICMUTE] = "MicrophoneMute", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev", [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext", [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup", [KEY_KBDINPUTASSIST_NEXTGROUP] = "KbdInputAssistNextGroup", [KEY_KBDINPUTASSIST_ACCEPT] = "KbdInputAssistAccept", [KEY_KBDINPUTASSIST_CANCEL] = "KbdInputAssistCancel", [KEY_MACRO1] = "Macro1", [KEY_MACRO2] = "Macro2", [KEY_MACRO3] = "Macro3", [KEY_MACRO4] = "Macro4", [KEY_MACRO5] = "Macro5", [KEY_MACRO6] = "Macro6", [KEY_MACRO7] = "Macro7", [KEY_MACRO8] = "Macro8", [KEY_MACRO9] = "Macro9", [KEY_MACRO10] = "Macro10", [KEY_MACRO11] = "Macro11", [KEY_MACRO12] = "Macro12", [KEY_MACRO13] = "Macro13", [KEY_MACRO14] = "Macro14", [KEY_MACRO15] = "Macro15", [KEY_MACRO16] = "Macro16", [KEY_MACRO17] = "Macro17", [KEY_MACRO18] = "Macro18", [KEY_MACRO19] = "Macro19", [KEY_MACRO20] = "Macro20", [KEY_MACRO21] = "Macro21", [KEY_MACRO22] = "Macro22", [KEY_MACRO23] = "Macro23", [KEY_MACRO24] = "Macro24", [KEY_MACRO25] = "Macro25", [KEY_MACRO26] = "Macro26", [KEY_MACRO27] = "Macro27", [KEY_MACRO28] = "Macro28", [KEY_MACRO29] = "Macro29", [KEY_MACRO30] = "Macro30", [BTN_TRIGGER_HAPPY1] = "TriggerHappy1", [BTN_TRIGGER_HAPPY2] = "TriggerHappy2", [BTN_TRIGGER_HAPPY3] = "TriggerHappy3", [BTN_TRIGGER_HAPPY4] = "TriggerHappy4", [BTN_TRIGGER_HAPPY5] = "TriggerHappy5", [BTN_TRIGGER_HAPPY6] = "TriggerHappy6", [BTN_TRIGGER_HAPPY7] = "TriggerHappy7", [BTN_TRIGGER_HAPPY8] = "TriggerHappy8", [BTN_TRIGGER_HAPPY9] = "TriggerHappy9", [BTN_TRIGGER_HAPPY10] = "TriggerHappy10", [BTN_TRIGGER_HAPPY11] = "TriggerHappy11", [BTN_TRIGGER_HAPPY12] = "TriggerHappy12", [BTN_TRIGGER_HAPPY13] = "TriggerHappy13", [BTN_TRIGGER_HAPPY14] = "TriggerHappy14", [BTN_TRIGGER_HAPPY15] = "TriggerHappy15", [BTN_TRIGGER_HAPPY16] = "TriggerHappy16", [BTN_TRIGGER_HAPPY17] = "TriggerHappy17", [BTN_TRIGGER_HAPPY18] = "TriggerHappy18", [BTN_TRIGGER_HAPPY19] = "TriggerHappy19", [BTN_TRIGGER_HAPPY20] = "TriggerHappy20", [BTN_TRIGGER_HAPPY21] = "TriggerHappy21", [BTN_TRIGGER_HAPPY22] = "TriggerHappy22", [BTN_TRIGGER_HAPPY23] = "TriggerHappy23", [BTN_TRIGGER_HAPPY24] = "TriggerHappy24", [BTN_TRIGGER_HAPPY25] = "TriggerHappy25", [BTN_TRIGGER_HAPPY26] = "TriggerHappy26", [BTN_TRIGGER_HAPPY27] = "TriggerHappy27", [BTN_TRIGGER_HAPPY28] = "TriggerHappy28", [BTN_TRIGGER_HAPPY29] = "TriggerHappy29", [BTN_TRIGGER_HAPPY30] = "TriggerHappy30", [BTN_TRIGGER_HAPPY31] = "TriggerHappy31", [BTN_TRIGGER_HAPPY32] = "TriggerHappy32", [BTN_TRIGGER_HAPPY33] = "TriggerHappy33", [BTN_TRIGGER_HAPPY34] = "TriggerHappy34", [BTN_TRIGGER_HAPPY35] = "TriggerHappy35", [BTN_TRIGGER_HAPPY36] = "TriggerHappy36", [BTN_TRIGGER_HAPPY37] = "TriggerHappy37", [BTN_TRIGGER_HAPPY38] = "TriggerHappy38", [BTN_TRIGGER_HAPPY39] = "TriggerHappy39", [BTN_TRIGGER_HAPPY40] = "TriggerHappy40", [BTN_STYLUS3] = "Stylus3", [BTN_TOOL_QUINTTAP] = "ToolQuintTap", [KEY_10CHANNELSDOWN] = "10ChannelsDown", [KEY_10CHANNELSUP] = "10ChannelsUp", [KEY_3D_MODE] = "3DMode", [KEY_ADDRESSBOOK] = "Addressbook", [KEY_ALS_TOGGLE] = "ALSToggle", [KEY_ASPECT_RATIO] = "AspectRatio", [KEY_ATTENDANT_OFF] = "AttendantOff", [KEY_ATTENDANT_ON] = "AttendantOn", [KEY_ATTENDANT_TOGGLE] = "AttendantToggle", [KEY_AUDIO_DESC] = "AudioDesc", [KEY_AUTOPILOT_ENGAGE_TOGGLE] = "AutoPiloteEngage", [KEY_BATTERY] = "Battery", [KEY_BLUETOOTH] = "BlueTooth", [KEY_BRIGHTNESS_CYCLE] = "BrightnessCycle", [KEY_BRIGHTNESS_MENU] = "BrightnessMenu", [KEY_BRL_DOT1] = "BrlDot1", [KEY_BRL_DOT10] = "BrlDot10", [KEY_BRL_DOT2] = "BrlDot2", [KEY_BRL_DOT3] = "BrlDot3", [KEY_BRL_DOT4] = "BrlDot4", [KEY_BRL_DOT5] = "BrlDot5", [KEY_BRL_DOT6] = "BrlDot6", [KEY_BRL_DOT7] = "BrlDot7", [KEY_BRL_DOT8] = "BrlDot8", [KEY_BRL_DOT9] = "BrlDot9", [KEY_CAMERA_DOWN] = "CameraDown", [KEY_CAMERA_FOCUS] = "CameraFocus", [KEY_CAMERA_LEFT] = "CameraLeft", [KEY_CAMERA_RIGHT] = "CameraRight", [KEY_CAMERA_UP] = "CameraUp", [KEY_CAMERA_ZOOMIN] = "CameraZoomIn", [KEY_CAMERA_ZOOMOUT] = "CameraZoomOut", [KEY_CLEARVU_SONAR] = "ClearVUSonar", [KEY_CONTEXT_MENU] = "ContextMenu", [KEY_DATA] = "Data", [KEY_DATABASE] = "DataBase", [KEY_DISPLAY_OFF] = "DisplayOff", [KEY_DISPLAYTOGGLE] = "DisplayToggle", [KEY_DOLLAR] = "Dollar", [KEY_DUAL_RANGE_RADAR] = "DualRangeRadat", [KEY_EDITOR] = "Editor", [KEY_EURO] = "Euro", [KEY_FASTREVERSE] = "FastReverse", [KEY_FISHING_CHART] = "FishingChart", [KEY_FN_RIGHT_SHIFT] = "FnRightShift", [KEY_FRAMEBACK] = "FrameBack", [KEY_FRAMEFORWARD] = "FrameForward", [KEY_FULL_SCREEN] = "FullScreen", [KEY_GAMES] = "Games", [KEY_GRAPHICSEDITOR] = "GraphicsEditor", [KEY_HANGUP_PHONE] = "HangUpPhone", [KEY_IMAGES] = "Images", [KEY_KBD_LCD_MENU1] = "KbdLcdMenu1", [KEY_KBD_LCD_MENU2] = "KbdLcdMenu2", [KEY_KBD_LCD_MENU3] = "KbdLcdMenu3", [KEY_KBD_LCD_MENU4] = "KbdLcdMenu4", [KEY_KBD_LCD_MENU5] = "KbdLcdMenu5", [KEY_LEFT_DOWN] = "LeftDown", [KEY_LEFT_UP] = "LeftUp", [KEY_LIGHTS_TOGGLE] = "LightToggle", [KEY_MACRO_PRESET1] = "MacroPreset1", [KEY_MACRO_PRESET2] = "MacroPreset2", [KEY_MACRO_PRESET3] = "MacroPrest3", [KEY_MACRO_PRESET_CYCLE] = "MacroPresetCycle", [KEY_MACRO_RECORD_START] = "MacroRecordStart", [KEY_MACRO_RECORD_STOP] = "MacroRecordStop", [KEY_MARK_WAYPOINT] = "MarkWayPoint", [KEY_MEDIA_REPEAT] = "MediaRepeat", [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messenger", [KEY_NAV_CHART] = "NavChar", [KEY_NAV_INFO] = "NavInfo", [KEY_NEWS] = "News", [KEY_NEXT_ELEMENT] = "NextElement", [KEY_NEXT_FAVORITE] = "NextFavorite", [KEY_NOTIFICATION_CENTER] = "NotificationCenter", [KEY_NUMERIC_0] = "Numeric0", [KEY_NUMERIC_1] = "Numeric1", [KEY_NUMERIC_11] = "Numceric11", [KEY_NUMERIC_12] = "Numeric12", [KEY_NUMERIC_2] = "Numeric2", [KEY_NUMERIC_3] = "Numeric3", [KEY_NUMERIC_4] = "Numeric4", [KEY_NUMERIC_5] = "Numeric5", [KEY_NUMERIC_6] = "Numeric6", [KEY_NUMERIC_7] = "Numeric7", [KEY_NUMERIC_8] = "Numeric8", [KEY_NUMERIC_9] = "Numeric9", [KEY_NUMERIC_A] = "NumericA", [KEY_NUMERIC_B] = "NumericB", [KEY_NUMERIC_C] = "NumericC", [KEY_NUMERIC_D] = "NumericD", [KEY_NUMERIC_POUND] = "NumericPound", [KEY_NUMERIC_STAR] = "NumericStar", [KEY_ONSCREEN_KEYBOARD] = "OnScreenKeyBoard", [KEY_PAUSE_RECORD] = "PauseRecord", [KEY_PICKUP_PHONE] = "PickUpPhone", [KEY_PRESENTATION] = "Presentation", [KEY_PREVIOUS_ELEMENT] = "PreviousElement", [KEY_PRIVACY_SCREEN_TOGGLE] = "PrivacyScreenToggle", [KEY_RADAR_OVERLAY] = "RadarOverLay", [KEY_RFKILL] = "RFKill", [KEY_RIGHT_DOWN] = "RightDown", [KEY_RIGHT_UP] = "RightUp", [KEY_ROOT_MENU] = "RootMenu", [KEY_ROTATE_LOCK_TOGGLE] = "RotateLockToggle", [KEY_SCALE] = "Scale", [KEY_SELECTIVE_SCREENSHOT] = "SelectiveScreenshot", [KEY_SIDEVU_SONAR] = "SideVUSonar", [KEY_SINGLE_RANGE_RADAR] = "SingleRangeRadar", [KEY_SLOWREVERSE] = "SlowReverse", [KEY_SOS] = "SOS", [KEY_SPREADSHEET] = "SpreadSheet", [KEY_STOP_RECORD] = "StopRecord", [KEY_TOUCHPAD_OFF] = "TouchPadOff", [KEY_TOUCHPAD_ON] = "TouchPadOn", [KEY_TOUCHPAD_TOGGLE] = "TouchPadToggle", [KEY_TRADITIONAL_SONAR] = "TraditionalSonar", [KEY_UNMUTE] = "Unmute", [KEY_UWB] = "UWB", [KEY_VIDEO_NEXT] = "VideoNext", [KEY_VIDEOPHONE] = "VideoPhone", [KEY_VIDEO_PREV] = "VideoPrev", [KEY_VOD] = "VOD", [KEY_VOICEMAIL] = "VoiceMail", [KEY_WLAN] = "WLAN", [KEY_WORDPROCESSOR] = "WordProcessor", [KEY_WPS_BUTTON] = "WPSButton", [KEY_WWAN] = "WWAN", [KEY_ZOOMIN] = "ZoomIn", [KEY_ZOOMOUT] = "ZoomOut", [KEY_ZOOMRESET] = "ZoomReset", }; static const char *relatives[REL_MAX + 1] = { [REL_X] = "X", [REL_Y] = "Y", [REL_Z] = "Z", [REL_RX] = "Rx", [REL_RY] = "Ry", [REL_RZ] = "Rz", [REL_HWHEEL] = "HWheel", [REL_DIAL] = "Dial", [REL_WHEEL] = "Wheel", [REL_MISC] = "Misc", [REL_WHEEL_HI_RES] = "WheelHiRes", [REL_HWHEEL_HI_RES] = "HWheelHiRes" }; static const char *absolutes[ABS_CNT] = { [ABS_X] = "X", [ABS_Y] = "Y", [ABS_Z] = "Z", [ABS_RX] = "Rx", [ABS_RY] = "Ry", [ABS_RZ] = "Rz", [ABS_THROTTLE] = "Throttle", [ABS_RUDDER] = "Rudder", [ABS_WHEEL] = "Wheel", [ABS_GAS] = "Gas", [ABS_BRAKE] = "Brake", [ABS_HAT0X] = "Hat0X", [ABS_HAT0Y] = "Hat0Y", [ABS_HAT1X] = "Hat1X", [ABS_HAT1Y] = "Hat1Y", [ABS_HAT2X] = "Hat2X", [ABS_HAT2Y] = "Hat2Y", [ABS_HAT3X] = "Hat3X", [ABS_HAT3Y] = "Hat 3Y", [ABS_PRESSURE] = "Pressure", [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", [ABS_VOLUME] = "Volume", [ABS_PROFILE] = "Profile", [ABS_MISC] = "Misc", [ABS_MT_SLOT] = "MTSlot", [ABS_MT_TOUCH_MAJOR] = "MTMajor", [ABS_MT_TOUCH_MINOR] = "MTMinor", [ABS_MT_WIDTH_MAJOR] = "MTMajorW", [ABS_MT_WIDTH_MINOR] = "MTMinorW", [ABS_MT_ORIENTATION] = "MTOrientation", [ABS_MT_POSITION_X] = "MTPositionX", [ABS_MT_POSITION_Y] = "MTPositionY", [ABS_MT_TOOL_TYPE] = "MTToolType", [ABS_MT_BLOB_ID] = "MTBlobID", [ABS_MT_TRACKING_ID] = "MTTrackingID", [ABS_MT_PRESSURE] = "MTPressure", [ABS_MT_DISTANCE] = "MTDistance", [ABS_MT_TOOL_X] = "MTToolX", [ABS_MT_TOOL_Y] = "MTToolY", }; static const char *misc[MSC_MAX + 1] = { [MSC_SERIAL] = "Serial", [MSC_PULSELED] = "Pulseled", [MSC_GESTURE] = "Gesture", [MSC_RAW] = "RawData", [MSC_SCAN] = "Scan", [MSC_TIMESTAMP] = "TimeStamp", }; static const char *leds[LED_MAX + 1] = { [LED_NUML] = "NumLock", [LED_CAPSL] = "CapsLock", [LED_SCROLLL] = "ScrollLock", [LED_COMPOSE] = "Compose", [LED_KANA] = "Kana", [LED_SLEEP] = "Sleep", [LED_SUSPEND] = "Suspend", [LED_MUTE] = "Mute", [LED_MISC] = "Misc", [LED_MAIL] = "Mail", [LED_CHARGING] = "Charging", }; static const char *repeats[REP_MAX + 1] = { [REP_DELAY] = "Delay", [REP_PERIOD] = "Period" }; static const char *sounds[SND_MAX + 1] = { [SND_CLICK] = "Click", [SND_BELL] = "Bell", [SND_TONE] = "Tone" }; static const char *software[SW_CNT] = { [SW_LID] = "Lid", [SW_TABLET_MODE] = "TabletMode", [SW_HEADPHONE_INSERT] = "HeadPhoneInsert", [SW_RFKILL_ALL] = "RFKillAll", [SW_MICROPHONE_INSERT] = "MicrophoneInsert", [SW_DOCK] = "Dock", [SW_LINEOUT_INSERT] = "LineOutInsert", [SW_JACK_PHYSICAL_INSERT] = "JackPhysicalInsert", [SW_VIDEOOUT_INSERT] = "VideoOutInsert", [SW_CAMERA_LENS_COVER] = "CameraLensCover", [SW_KEYPAD_SLIDE] = "KeyPadSlide", [SW_FRONT_PROXIMITY] = "FrontProximity", [SW_ROTATE_LOCK] = "RotateLock", [SW_LINEIN_INSERT] = "LineInInsert", [SW_MUTE_DEVICE] = "MuteDevice", [SW_PEN_INSERTED] = "PenInserted", [SW_MACHINE_COVER] = "MachineCover", }; static const char *force[FF_CNT] = { [FF_RUMBLE] = "FF_RUMBLE", [FF_PERIODIC] = "FF_PERIODIC", [FF_CONSTANT] = "FF_CONSTANT", [FF_SPRING] = "FF_SPRING", [FF_FRICTION] = "FF_FRICTION", [FF_DAMPER] = "FF_DAMPER", [FF_INERTIA] = "FF_INERTIA", [FF_RAMP] = "FF_RAMP", [FF_SQUARE] = "FF_SQUARE", [FF_TRIANGLE] = "FF_TRIANGLE", [FF_SINE] = "FF_SINE", [FF_SAW_UP] = "FF_SAW_UP", [FF_SAW_DOWN] = "FF_SAW_DOWN", [FF_CUSTOM] = "FF_CUSTOM", [FF_GAIN] = "FF_GAIN", [FF_AUTOCENTER] = "FF_AUTOCENTER", [FF_MAX] = "FF_MAX", }; static const char *force_status[FF_STATUS_MAX + 1] = { [FF_STATUS_STOPPED] = "FF_STATUS_STOPPED", [FF_STATUS_PLAYING] = "FF_STATUS_PLAYING", }; static const char **names[EV_MAX + 1] = { [EV_SYN] = syncs, [EV_KEY] = keys, [EV_REL] = relatives, [EV_ABS] = absolutes, [EV_MSC] = misc, [EV_LED] = leds, [EV_SND] = sounds, [EV_REP] = repeats, [EV_SW] = software, [EV_FF] = force, [EV_FF_STATUS] = force_status, }; static void hid_resolv_event(__u8 type, __u16 code, struct seq_file *f) { if (events[type]) seq_printf(f, "%s.", events[type]); else seq_printf(f, "%02x.", type); if (names[type] && names[type][code]) seq_printf(f, "%s", names[type][code]); else seq_printf(f, "%04x", code); } static void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f) { int i, j, k; struct hid_report *report; struct hid_usage *usage; for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) { for ( j = 0; j < report->field[i]->maxusage; j++) { usage = report->field[i]->usage + j; hid_resolv_usage(usage->hid, f); seq_printf(f, " ---> "); hid_resolv_event(usage->type, usage->code, f); seq_printf(f, "\n"); } } } } } static int hid_debug_rdesc_show(struct seq_file *f, void *p) { struct hid_device *hdev = f->private; const __u8 *rdesc = hdev->rdesc; unsigned rsize = hdev->rsize; int i; if (!rdesc) { rdesc = hdev->dev_rdesc; rsize = hdev->dev_rsize; } /* dump HID report descriptor */ for (i = 0; i < rsize; i++) seq_printf(f, "%02x ", rdesc[i]); seq_printf(f, "\n\n"); /* dump parsed data and input mappings */ if (down_interruptible(&hdev->driver_input_lock)) return 0; hid_dump_device(hdev, f); seq_printf(f, "\n"); hid_dump_input_mapping(hdev, f); up(&hdev->driver_input_lock); return 0; } static int hid_debug_events_open(struct inode *inode, struct file *file) { int err = 0; struct hid_debug_list *list; unsigned long flags; if (!(list = kzalloc(sizeof(struct hid_debug_list), GFP_KERNEL))) { err = -ENOMEM; goto out; } err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); if (err) { kfree(list); goto out; } list->hdev = (struct hid_device *) inode->i_private; kref_get(&list->hdev->ref); file->private_data = list; mutex_init(&list->read_mutex); spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_add_tail(&list->node, &list->hdev->debug_list); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); out: return err; } static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); if (kfifo_is_empty(&list->hid_debug_fifo)) { add_wait_queue(&list->hdev->debug_wait, &wait); set_current_state(TASK_INTERRUPTIBLE); while (kfifo_is_empty(&list->hid_debug_fifo)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } /* if list->hdev is NULL we cannot remove_wait_queue(). * if list->hdev->debug is 0 then hid_debug_unregister() * was already called and list->hdev is being destroyed. * if we add remove_wait_queue() here we can hit a race. */ if (!list->hdev || !list->hdev->debug) { ret = -EIO; set_current_state(TASK_RUNNING); goto out; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } /* allow O_NONBLOCK from other threads */ mutex_unlock(&list->read_mutex); schedule(); mutex_lock(&list->read_mutex); set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); remove_wait_queue(&list->hdev->debug_wait, &wait); if (ret) goto out; } /* pass the fifo content to userspace, locking is not needed with only * one concurrent reader and one concurrent writer */ ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); if (ret) goto out; ret = copied; out: mutex_unlock(&list->read_mutex); return ret; } static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) { struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; return 0; } static int hid_debug_events_release(struct inode *inode, struct file *file) { struct hid_debug_list *list = file->private_data; unsigned long flags; spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); kfifo_free(&list->hid_debug_fifo); kref_put(&list->hdev->ref, hiddev_free); kfree(list); return 0; } DEFINE_SHOW_ATTRIBUTE(hid_debug_rdesc); static const struct file_operations hid_debug_events_fops = { .owner = THIS_MODULE, .open = hid_debug_events_open, .read = hid_debug_events_read, .poll = hid_debug_events_poll, .release = hid_debug_events_release, .llseek = noop_llseek, }; void hid_debug_register(struct hid_device *hdev, const char *name) { hdev->debug_dir = debugfs_create_dir(name, hid_debug_root); hdev->debug_rdesc = debugfs_create_file("rdesc", 0400, hdev->debug_dir, hdev, &hid_debug_rdesc_fops); hdev->debug_events = debugfs_create_file("events", 0400, hdev->debug_dir, hdev, &hid_debug_events_fops); hdev->debug = 1; } void hid_debug_unregister(struct hid_device *hdev) { hdev->debug = 0; wake_up_interruptible(&hdev->debug_wait); debugfs_remove(hdev->debug_rdesc); debugfs_remove(hdev->debug_events); debugfs_remove(hdev->debug_dir); } void hid_debug_init(void) { hid_debug_root = debugfs_create_dir("hid", NULL); } void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); }
111 111 111 111 111 111 108 111 111 111 111 111 113 113 113 16 13 3 3 3 111 111 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 // SPDX-License-Identifier: GPL-2.0-or-later /* incoming call handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/errqueue.h> #include <linux/udp.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/icmp.h> #include <linux/gfp.h> #include <linux/circ_buf.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include "ar-internal.h" static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, unsigned long user_call_ID) { } /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. */ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; max = rx->sk.sk_max_ack_backlog; tmp = rx->sk.sk_ack_backlog; if (tmp >= max) { _leave(" = -ENOBUFS [full %u]", max); return -ENOBUFS; } max -= tmp; /* We don't need more conns and peers than we have calls, but on the * other hand, we shouldn't ever use more peers than conns or conns * than calls. */ call_head = b->call_backlog_head; call_tail = READ_ONCE(b->call_backlog_tail); tmp = CIRC_CNT(call_head, call_tail, size); if (tmp >= max) { _leave(" = -ENOBUFS [enough %u]", tmp); return -ENOBUFS; } max = tmp + 1; head = b->peer_backlog_head; tail = READ_ONCE(b->peer_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_peer *peer; peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc); if (!peer) return -ENOMEM; b->peer_backlog[head] = peer; smp_store_release(&b->peer_backlog_head, (head + 1) & (size - 1)); } head = b->conn_backlog_head; tail = READ_ONCE(b->conn_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_connection *conn; conn = rxrpc_prealloc_service_connection(rxnet, gfp); if (!conn) return -ENOMEM; b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); } /* Now it gets complicated, because calls get registered with the * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), user_call_ID, rxrpc_call_new_prealloc_service); write_lock(&rx->call_lock); /* Check the user ID isn't already in use */ pp = &rx->calls.rb_node; parent = NULL; while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; } call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; if (user_attach_call) { rxrpc_get_call(call, rxrpc_call_get_kernel_service); user_attach_call(call, user_call_ID); } rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); set_bit(RXRPC_CALL_HAS_USERID, &call->flags); list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); rxnet = call->rxnet; spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); spin_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); return 0; id_in_use: write_unlock(&rx->call_lock); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; } /* * Allocate the preallocation buffers for incoming service calls. These must * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_backlog *b = rx->backlog; if (!b) { b = kzalloc(sizeof(struct rxrpc_backlog), gfp); if (!b) return -ENOMEM; rx->backlog = b; } return 0; } /* * Discard the preallocation on a service. */ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); unsigned int size = RXRPC_BACKLOG_MAX, head, tail; if (!b) return; rx->backlog = NULL; /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ spin_lock_irq(&rx->incoming_lock); spin_unlock_irq(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer); kfree(peer); tail = (tail + 1) & (size - 1); } head = b->conn_backlog_head; tail = b->conn_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_connection *conn = b->conn_backlog[tail]; write_lock(&rxnet->conn_lock); list_del(&conn->link); list_del(&conn->proc_link); write_unlock(&rxnet->conn_lock); kfree(conn); if (atomic_dec_and_test(&rxnet->nr_conns)) wake_up_var(&rxnet->nr_conns); tail = (tail + 1) & (size - 1); } head = b->call_backlog_head; tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; rcu_assign_pointer(call->socket, rx); if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); if (call->notify_rx) call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_discard_prealloc); tail = (tail + 1) & (size - 1); } kfree(b); } /* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); conn_head = smp_load_acquire(&b->conn_backlog_head); conn_tail = b->conn_backlog_tail; conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); ASSERTCMP(conn_count, >=, call_count); peer_head = smp_load_acquire(&b->peer_backlog_head); peer_tail = b->peer_backlog_tail; ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, conn_count); if (call_count == 0) return NULL; if (!conn) { if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn)) peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; peer->srx = *peer_srx; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ conn = b->conn_backlog[conn_tail]; b->conn_backlog[conn_tail] = NULL; smp_store_release(&b->conn_backlog_tail, (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn); conn->peer = peer; rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn); rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { rxrpc_get_connection(conn, rxrpc_conn_get_service_conn); atomic_inc(&conn->active); } /* And now we can allocate and set up a new call */ call = b->call_backlog[call_tail]; b->call_backlog[call_tail] = NULL; smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_see_call(call, rxrpc_call_see_accept); call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call); call->conn = conn; call->security = conn->security; call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept); call->dest_srx = peer->srx; call->cong_ssthresh = call->peer->cong_ssthresh; call->tx_last_sent = ktime_get_real(); return call; } /* * Set up a new incoming call. Called from the I/O thread. * * If this is for a kernel service, when we allocate the call, it will have * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the * retainer ref obtained from the backlog buffer. Prealloc calls for userspace * services only have the ref from the backlog buffer. * * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ bool rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call = NULL; struct rxrpc_sock *rx; _enter(""); /* Don't set up a call for anything other than a DATA packet. */ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); read_lock_irq(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; goto discard; } if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { rxrpc_direct_abort(skb, rxrpc_abort_shut_down, RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; } trace_rxrpc_receive(call, rxrpc_receive_incoming, sp->hdr.serial, sp->hdr.seq); /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { conn->state = RXRPC_CONN_SERVICE_CHALLENGING; set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge); } spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); if (hlist_unhashed(&call->error_link)) { spin_lock_irq(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); spin_unlock_irq(&call->peer->lock); } _leave(" = %p{%d}", call, call->debug_id); rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; unsupported_service: read_unlock_irq(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: read_unlock_irq(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); _leave(" = f [%u]", skb->mark); return false; discard: read_unlock_irq(&local->services_lock); return true; } /* * Charge up socket with preallocated calls, attaching user call IDs. */ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { struct rxrpc_backlog *b = rx->backlog; if (rx->sk.sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); } /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls * @sock: The socket on which to preallocate * @notify_rx: Event notification function for the call * @user_attach_call: Func to attach call to user_call_ID * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. * @debug_id: The tracing debug ID. * * Charge up the socket with preallocated calls, each with a user ID. A * function should be provided to effect the attachment from the user's side. * The user is given a ref to hold on the call. * * Note that the call may be come connected before this function returns. */ int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct rxrpc_backlog *b = rx->backlog; if (sock->sk->sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, notify_rx, user_attach_call, user_call_ID, gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
4 2 3 2 3 3 3 2 2 2 2 2 2 2 3 2 2 3 3 2 15 14 1 13 1 12 1 10 1 11 11 10 1 10 9 9 9 9 9 9 9 9 9 9 9 9 9 8 8 3 1 2 2 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 // SPDX-License-Identifier: GPL-2.0-only #include <net/netdev_queues.h> #include "netlink.h" #include "common.h" struct rings_req_info { struct ethnl_req_info base; }; struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; struct kernel_ethtool_ringparam kernel_ringparam; u32 supported_ring_params; }; #define RINGS_REPDATA(__reply_base) \ container_of(__reply_base, struct rings_reply_data, base) const struct nla_policy ethnl_rings_get_policy[] = { [ETHTOOL_A_RINGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int rings_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct rings_reply_data *data = RINGS_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; data->supported_ring_params = dev->ethtool_ops->supported_ring_params; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->kernel_ringparam.tcp_data_split = dev->cfg->hds_config; data->kernel_ringparam.hds_thresh = dev->cfg->hds_thresh; dev->ethtool_ops->get_ringparam(dev, &data->ringparam, &data->kernel_ringparam, info->extack); ethnl_ops_complete(dev); return 0; } static int rings_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u32)) + /* _RINGS_RX_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */ nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/ } static int rings_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rings_reply_data *data = RINGS_REPDATA(reply_base); const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; u32 supported_ring_params = data->supported_ring_params; WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED); if ((ringparam->rx_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, ringparam->rx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX, ringparam->rx_pending))) || (ringparam->rx_mini_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI_MAX, ringparam->rx_mini_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI, ringparam->rx_mini_pending))) || (ringparam->rx_jumbo_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO_MAX, ringparam->rx_jumbo_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO, ringparam->rx_jumbo_pending))) || (ringparam->tx_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, ringparam->tx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX, ringparam->tx_pending))) || (kr->rx_buf_len && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) || (kr->tcp_data_split && (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, kr->tcp_data_split))) || (kr->cqe_size && (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) || nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push) || ((supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN) && (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, kr->tx_push_buf_max_len) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, kr->tx_push_buf_len))) || ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) && (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH, kr->hds_thresh) || nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX, kr->hds_thresh_max)))) return -EMSGSIZE; return 0; } /* RINGS_SET */ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_RINGS_RX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = NLA_POLICY_MAX(NLA_U8, ETHTOOL_TCP_DATA_SPLIT_ENABLED), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 }, }; static int ethnl_set_rings_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; struct nlattr **tb = info->attrs; if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] && !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], "setting rx buf len not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TCP_DATA_SPLIT)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], "setting TCP data split is not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_HDS_THRESH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_HDS_THRESH], "setting hds-thresh is not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_CQE_SIZE], "setting cqe size not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TX_PUSH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH], "setting tx push not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_RX_PUSH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_PUSH)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_RX_PUSH], "setting rx push not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], "setting tx push buf len is not supported"); return -EOPNOTSUPP; } return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP; } static int ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) { struct kernel_ethtool_ringparam kernel_ringparam; struct net_device *dev = req_info->dev; struct ethtool_ringparam ringparam; struct nlattr **tb = info->attrs; const struct nlattr *err_attr; bool mod = false; int ret; ethtool_ringparam_get_cfg(dev, &ringparam, &kernel_ringparam, info->extack); ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, tb[ETHTOOL_A_RINGS_RX_MINI], &mod); ethnl_update_u32(&ringparam.rx_jumbo_pending, tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); ethnl_update_u32(&kernel_ringparam.rx_buf_len, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ethnl_update_u8(&kernel_ringparam.tcp_data_split, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); ethnl_update_u8(&kernel_ringparam.tx_push, tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ethnl_update_u8(&kernel_ringparam.rx_push, tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); ethnl_update_u32(&kernel_ringparam.hds_thresh, tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod); if (!mod) return 0; if (kernel_ringparam.tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && dev_xdp_sb_prog_count(dev)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], "tcp-data-split can not be enabled with single buffer XDP"); return -EINVAL; } if (dev_get_min_mp_channel_count(dev)) { if (kernel_ringparam.tcp_data_split != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(info->extack, "can't disable tcp-data-split while device has memory provider enabled"); return -EINVAL; } else if (kernel_ringparam.hds_thresh) { NL_SET_ERR_MSG(info->extack, "can't set non-zero hds_thresh while device is memory provider enabled"); return -EINVAL; } } /* ensure new ring parameters are within limits */ if (ringparam.rx_pending > ringparam.rx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX]; else if (ringparam.rx_mini_pending > ringparam.rx_mini_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX_MINI]; else if (ringparam.rx_jumbo_pending > ringparam.rx_jumbo_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; else if (ringparam.tx_pending > ringparam.tx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_TX]; else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max) err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH]; else err_attr = NULL; if (err_attr) { NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested ring size exceeds maximum"); return -EINVAL; } if (kernel_ringparam.tx_push_buf_len > kernel_ringparam.tx_push_buf_max_len) { NL_SET_ERR_MSG_ATTR_FMT(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], "Requested TX push buffer exceeds the maximum of %u", kernel_ringparam.tx_push_buf_max_len); return -EINVAL; } dev->cfg_pending->hds_config = kernel_ringparam.tcp_data_split; dev->cfg_pending->hds_thresh = kernel_ringparam.hds_thresh; ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_rings_request_ops = { .request_cmd = ETHTOOL_MSG_RINGS_GET, .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY, .hdr_attr = ETHTOOL_A_RINGS_HEADER, .req_info_size = sizeof(struct rings_req_info), .reply_data_size = sizeof(struct rings_reply_data), .prepare_data = rings_prepare_data, .reply_size = rings_reply_size, .fill_reply = rings_fill_reply, .set_validate = ethnl_set_rings_validate, .set = ethnl_set_rings, .set_ntf_cmd = ETHTOOL_MSG_RINGS_NTF, };
2 122 106 111 111 111 111 111 111 111 111 111 111 111 111 111 111 2 2 50 3 25 34 11 29 10 9 10 10 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 /* * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "subscr.h" #include "link.h" #include "bcast.h" #include "socket.h" #include "name_distr.h" #include "discover.h" #include "netlink.h" #include "monitor.h" #include "trace.h" #include "crypto.h" #include <linux/pkt_sched.h> struct tipc_stats { u32 sent_pkts; u32 recv_pkts; u32 sent_states; u32 recv_states; u32 sent_probes; u32 recv_probes; u32 sent_nacks; u32 recv_nacks; u32 sent_acks; u32 sent_bundled; u32 sent_bundles; u32 recv_bundled; u32 recv_bundles; u32 retransmitted; u32 sent_fragmented; u32 sent_fragments; u32 recv_fragmented; u32 recv_fragments; u32 link_congs; /* # port sends blocked by congestion */ u32 deferred_recv; u32 duplicates; u32 max_queue_sz; /* send queue size high water mark */ u32 accu_queue_sz; /* used for send queue size profiling */ u32 queue_sz_counts; /* used for send queue size profiling */ u32 msg_length_counts; /* used for message length profiling */ u32 msg_lengths_total; /* used for message length profiling */ u32 msg_length_profile[7]; /* used for msg. length profiling */ }; /** * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string * @net: pointer to namespace struct * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @mon_state: cookie with information needed by link monitor * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established * @backlogq: queue for messages waiting to be sent * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages * @inputq: buffer queue for messages to be delivered upwards * @namedq: buffer queue for name table messages to be delivered upwards * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity * @session: session to be used by link * @snd_nxt_state: next send seq number * @rcv_nxt_state: next rcv seq number * @in_session: have received ACTIVATE_MSG from peer * @active: link is active * @if_name: associated interface name * @rst_cnt: link reset counter * @drop_point: seq number for failover handling (FIXME) * @failover_reasm_skb: saved failover msg ptr (FIXME) * @failover_deferdq: deferred message queue for failover processing (FIXME) * @transmq: the link's transmit queue * @backlog: link's backlog by priority (importance) * @snd_nxt: next sequence number to be used * @rcv_unacked: # messages read by user, but not yet acked back to peer * @deferdq: deferred receive queue * @window: sliding window size for congestion handling * @min_win: minimal send window to be used by link * @ssthresh: slow start threshold for congestion handling * @max_win: maximal send window to be used by link * @cong_acks: congestion acks for congestion avoidance (FIXME) * @checkpoint: seq number for congestion window size handling * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message * @last_gap: last gap ack blocks for bcast (FIXME) * @last_ga: ptr to gap ack blocks * @bc_rcvlink: the peer specific link used for broadcast reception * @bc_sndlink: the namespace global link used for broadcast sending * @nack_state: bcast nack state * @bc_peer_is_up: peer has acked the bcast init msg */ struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct net *net; /* Management and link supervision data */ u16 peer_session; u16 session; u16 snd_nxt_state; u16 rcv_nxt_state; u32 peer_bearer_id; u32 bearer_id; u32 tolerance; u32 abort_limit; u32 state; u16 peer_caps; bool in_session; bool active; u32 silent_intv_cnt; char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; struct tipc_mon_state mon_state; u16 rst_cnt; /* Failover/synch */ u16 drop_point; struct sk_buff *failover_reasm_skb; struct sk_buff_head failover_deferdq; /* Max packet negotiation */ u16 mtu; u16 advertised_mtu; /* Sending */ struct sk_buff_head transmq; struct sk_buff_head backlogq; struct { u16 len; u16 limit; struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; /* Reception */ u16 rcv_nxt; u32 rcv_unacked; struct sk_buff_head deferdq; struct sk_buff_head *inputq; struct sk_buff_head *namedq; /* Congestion handling */ struct sk_buff_head wakeupq; u16 window; u16 min_win; u16 ssthresh; u16 max_win; u16 cong_acks; u16 checkpoint; /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; u16 acked; u16 last_gap; struct tipc_gap_ack_blks *last_ga; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; u8 nack_state; bool bc_peer_is_up; /* Statistics */ struct tipc_stats stats; }; /* * Error message prefixes */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; /* Send states for broadcast NACKs */ enum { BC_NACK_SND_CONDITIONAL, BC_NACK_SND_UNCONDITIONAL, BC_NACK_SND_SUPPRESS, }; #define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10)) #define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) /* Link FSM states: */ enum { LINK_ESTABLISHED = 0xe, LINK_ESTABLISHING = 0xe << 4, LINK_RESET = 0x1 << 8, LINK_RESETTING = 0x2 << 12, LINK_PEER_RESET = 0xd << 16, LINK_FAILINGOVER = 0xf << 20, LINK_SYNCHING = 0xc << 24 }; static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, bool probe_reply, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, struct tipc_link *l, u8 start_index); static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr); static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, bool *retransmitted, int *rc); static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted); /* * Simple non-static link routines (i.e. referenced outside this file) */ bool tipc_link_is_up(struct tipc_link *l) { return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } bool tipc_link_peer_is_down(struct tipc_link *l) { return l->state == LINK_PEER_RESET; } bool tipc_link_is_reset(struct tipc_link *l) { return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); } bool tipc_link_is_establishing(struct tipc_link *l) { return l->state == LINK_ESTABLISHING; } bool tipc_link_is_synching(struct tipc_link *l) { return l->state == LINK_SYNCHING; } bool tipc_link_is_failingover(struct tipc_link *l) { return l->state == LINK_FAILINGOVER; } bool tipc_link_is_blocked(struct tipc_link *l) { return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } static bool link_is_bc_sndlink(struct tipc_link *l) { return !l->bc_sndlink; } static bool link_is_bc_rcvlink(struct tipc_link *l) { return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); } void tipc_link_set_active(struct tipc_link *l, bool active) { l->active = active; } u32 tipc_link_id(struct tipc_link *l) { return l->peer_bearer_id << 16 | l->bearer_id; } int tipc_link_min_win(struct tipc_link *l) { return l->min_win; } int tipc_link_max_win(struct tipc_link *l) { return l->max_win; } int tipc_link_prio(struct tipc_link *l) { return l->priority; } unsigned long tipc_link_tolerance(struct tipc_link *l) { return l->tolerance; } struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) { return l->inputq; } char tipc_link_plane(struct tipc_link *l) { return l->net_plane; } struct net *tipc_link_net(struct tipc_link *l) { return l->net; } void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; } void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) { struct tipc_link *rcv_l = uc_l->bc_rcvlink; snd_l->ackers++; rcv_l->acked = snd_l->snd_nxt - 1; snd_l->state = LINK_ESTABLISHED; tipc_link_build_bc_init_msg(uc_l, xmitq); } void tipc_link_remove_bc_peer(struct tipc_link *snd_l, struct tipc_link *rcv_l, struct sk_buff_head *xmitq) { u16 ack = snd_l->snd_nxt - 1; snd_l->ackers--; rcv_l->bc_peer_is_up = true; rcv_l->state = LINK_ESTABLISHED; tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; if (!snd_l->ackers) { trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!"); tipc_link_reset(snd_l); snd_l->state = LINK_RESET; __skb_queue_purge(xmitq); } } int tipc_link_bc_peers(struct tipc_link *l) { return l->ackers; } static u16 link_bc_rcv_gap(struct tipc_link *l) { struct sk_buff *skb = skb_peek(&l->deferdq); u16 gap = 0; if (more(l->snd_nxt, l->rcv_nxt)) gap = l->snd_nxt - l->rcv_nxt; if (skb) gap = buf_seqno(skb) - l->rcv_nxt; return gap; } void tipc_link_set_mtu(struct tipc_link *l, int mtu) { l->mtu = mtu; } int tipc_link_mtu(struct tipc_link *l) { return l->mtu; } int tipc_link_mss(struct tipc_link *l) { #ifdef CONFIG_TIPC_CRYPTO return l->mtu - INT_H_SIZE - EMSG_OVERHEAD; #else return l->mtu - INT_H_SIZE; #endif } u16 tipc_link_rcv_nxt(struct tipc_link *l) { return l->rcv_nxt; } u16 tipc_link_acked(struct tipc_link *l) { return l->acked; } char *tipc_link_name(struct tipc_link *l) { return l->name; } u32 tipc_link_state(struct tipc_link *l) { return l->state; } /** * tipc_link_create - create a new link * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link * @net_plane: network plane (A,B,c..) this link belongs to * @mtu: mtu to be advertised by link * @priority: priority to be used by link * @min_win: minimal send window to be used by link * @max_win: maximal send window to be used by link * @session: session to be used by link * @peer: node id of peer node * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending * @bc_rcvlink: the peer specific link used for broadcast reception * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * @self: local unicast link id * @peer_id: 128-bit ID of peer * * Return: true if link was created, otherwise false */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, u32 min_win, u32 max_win, u32 session, u32 self, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link) { char peer_str[NODE_ID_STR_LEN] = {0,}; char self_str[NODE_ID_STR_LEN] = {0,}; struct tipc_link *l; l = kzalloc(sizeof(*l), GFP_ATOMIC); if (!l) return false; *link = l; l->session = session; /* Set link name for unicast links only */ if (peer_id) { tipc_nodeid2string(self_str, tipc_own_id(net)); if (strlen(self_str) > 16) sprintf(self_str, "%x", self); tipc_nodeid2string(peer_str, peer_id); if (strlen(peer_str) > 16) sprintf(peer_str, "%x", peer); } /* Peer i/f name will be completed by reset/activate message */ snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown", self_str, if_name, peer_str); strcpy(l->if_name, if_name); l->addr = peer; l->peer_caps = peer_caps; l->net = net; l->in_session = false; l->bearer_id = bearer_id; l->tolerance = tolerance; if (bc_rcvlink) bc_rcvlink->tolerance = tolerance; l->net_plane = net_plane; l->advertised_mtu = mtu; l->mtu = mtu; l->priority = priority; tipc_link_set_queue_limits(l, min_win, max_win); l->ackers = 1; l->bc_sndlink = bc_sndlink; l->bc_rcvlink = bc_rcvlink; l->inputq = inputq; l->namedq = namedq; l->state = LINK_RESETTING; __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); return true; } /** * tipc_link_bc_create - create new link to be used for broadcast * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers * @min_win: minimal send window to be used by link * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * @ownnode: identity of own node * @peer: node id of peer node * @peer_id: 128-bit ID of peer * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending * * Return: true if link was created, otherwise false */ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link) { struct tipc_link *l; if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, max_win, 0, ownnode, peer, NULL, peer_caps, bc_sndlink, NULL, inputq, namedq, link)) return false; l = *link; if (peer_id) { char peer_str[NODE_ID_STR_LEN] = {0,}; tipc_nodeid2string(peer_str, peer_id); if (strlen(peer_str) > 16) sprintf(peer_str, "%x", peer); /* Broadcast receiver link name: "broadcast-link:<peer>" */ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, peer_str); } else { strcpy(l->name, tipc_bclink_name); } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); tipc_link_reset(l); l->state = LINK_RESET; l->ackers = 0; l->bc_rcvlink = l; /* Broadcast send link is always up */ if (link_is_bc_sndlink(l)) l->state = LINK_ESTABLISHED; /* Disable replicast if even a single peer doesn't support it */ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) tipc_bcast_toggle_rcast(net, false); return true; } /** * tipc_link_fsm_evt - link finite state machine * @l: pointer to link * @evt: state machine event to be processed */ int tipc_link_fsm_evt(struct tipc_link *l, int evt) { int rc = 0; int old_state = l->state; switch (l->state) { case LINK_RESETTING: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_FAILURE_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILOVER_END_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_RESET: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_ESTABLISHING; break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; break; case LINK_FAILURE_EVT: case LINK_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILOVER_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_PEER_RESET: switch (evt) { case LINK_RESET_EVT: l->state = LINK_ESTABLISHING; break; case LINK_PEER_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILURE_EVT: break; case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; case LINK_FAILINGOVER: switch (evt) { case LINK_FAILOVER_END_EVT: l->state = LINK_RESET; break; case LINK_PEER_RESET_EVT: case LINK_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILURE_EVT: break; case LINK_FAILOVER_BEGIN_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_ESTABLISHING: switch (evt) { case LINK_ESTABLISH_EVT: l->state = LINK_ESTABLISHED; break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_FAILURE_EVT: case LINK_PEER_RESET_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_FAILOVER_END_EVT: break; case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_ESTABLISHED: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_FAILURE_EVT: l->state = LINK_RESETTING; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: case LINK_SYNCH_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: l->state = LINK_SYNCHING; break; case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; case LINK_SYNCHING: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_FAILURE_EVT: l->state = LINK_RESETTING; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: case LINK_SYNCH_BEGIN_EVT: break; case LINK_SYNCH_END_EVT: l->state = LINK_ESTABLISHED; break; case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; default: pr_err("Unknown FSM state %x in %s\n", l->state, l->name); } trace_tipc_link_fsm(l->name, old_state, l->state, evt); return rc; illegal_evt: pr_err("Illegal FSM event %x in state %x on link %s\n", evt, l->state, l->name); trace_tipc_link_fsm(l->name, old_state, l->state, evt); return rc; } /* link_profile_stats - update statistical profiling of traffic */ static void link_profile_stats(struct tipc_link *l) { struct sk_buff *skb; struct tipc_msg *msg; int length; /* Update counters used in statistical profiling of send traffic */ l->stats.accu_queue_sz += skb_queue_len(&l->transmq); l->stats.queue_sz_counts++; skb = skb_peek(&l->transmq); if (!skb) return; msg = buf_msg(skb); length = msg_size(msg); if (msg_user(msg) == MSG_FRAGMENTER) { if (msg_type(msg) != FIRST_FRAGMENT) return; length = msg_size(msg_inner_hdr(msg)); } l->stats.msg_lengths_total += length; l->stats.msg_length_counts++; if (length <= 64) l->stats.msg_length_profile[0]++; else if (length <= 256) l->stats.msg_length_profile[1]++; else if (length <= 1024) l->stats.msg_length_profile[2]++; else if (length <= 4096) l->stats.msg_length_profile[3]++; else if (length <= 16384) l->stats.msg_length_profile[4]++; else if (length <= 32768) l->stats.msg_length_profile[5]++; else l->stats.msg_length_profile[6]++; } /** * tipc_link_too_silent - check if link is "too silent" * @l: tipc link to be checked * * Return: true if the link 'silent_intv_cnt' is about to reach the * 'abort_limit' value, otherwise false */ bool tipc_link_too_silent(struct tipc_link *l) { return (l->silent_intv_cnt + 2 > l->abort_limit); } /* tipc_link_timeout - perform periodic task as instructed from node timeout */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = 0; int rc = 0; bool state = false; bool probe = false; bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; struct tipc_mon_state *mstate = &l->mon_state; trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " "); trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " "); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: mtyp = STATE_MSG; link_profile_stats(l); tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); state = bc_acked != bc_snt; state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; } l->checkpoint = l->snd_nxt; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: case LINK_RESETTING: case LINK_FAILINGOVER: break; default: break; } if (state || probe || setup) tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } /** * link_schedule_user - schedule a message sender for wakeup after congestion * @l: congested link * @hdr: header of message that is being sent * Create pseudo msg to send back to user when congestion abates */ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) { u32 dnode = tipc_own_addr(l->net); u32 dport = msg_origport(hdr); struct sk_buff *skb; /* Create and schedule wakeup pseudo message */ skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(&l->wakeupq, skb); l->stats.link_congs++; trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!"); return -ELINKCONG; } /** * link_prepare_wakeup - prepare users for wakeup after congestion * @l: congested link * Wake up a number of waiting users, as permitted by available space * in the send queue */ static void link_prepare_wakeup(struct tipc_link *l) { struct sk_buff_head *wakeupq = &l->wakeupq; struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; struct sk_buff_head tmpq; int avail[5] = {0,}; int imp = 0; __skb_queue_head_init(&tmpq); for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; if (avail[imp] <= 0) continue; avail[imp]--; __skb_unlink(skb, wakeupq); __skb_queue_tail(&tmpq, skb); } spin_lock_bh(&inputq->lock); skb_queue_splice_tail(&tmpq, inputq); spin_unlock_bh(&inputq->lock); } /** * tipc_link_set_skb_retransmit_time - set the time at which retransmission of * the given skb should be next attempted * @skb: skb to set a future retransmission time for * @l: link the skb will be transmitted on */ static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, struct tipc_link *l) { if (link_is_bc_sndlink(l)) TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; else TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; } void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; u32 imp; __skb_queue_head_init(&list); l->in_session = false; /* Force re-synch of peer session number before establishing */ l->peer_session--; l->session++; l->mtu = l->advertised_mtu; spin_lock_bh(&l->wakeupq.lock); skb_queue_splice_init(&l->wakeupq, &list); spin_unlock_bh(&l->wakeupq.lock); spin_lock_bh(&l->inputq->lock); skb_queue_splice_init(&list, l->inputq); spin_unlock_bh(&l->inputq->lock); __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); __skb_queue_purge(&l->failover_deferdq); for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { l->backlog[imp].len = 0; l->backlog[imp].target_bskb = NULL; } kfree_skb(l->reasm_buf); kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; l->rcv_nxt = 1; l->snd_nxt_state = 1; l->rcv_nxt_state = 1; l->acked = 0; l->last_gap = 0; kfree(l->last_ga); l->last_ga = NULL; l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } /** * tipc_link_xmit(): enqueue buffer list according to queue situation * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; struct tipc_msg *hdr; bool new_bundle; int rc = 0; int imp; if (pkt_cnt <= 0) return 0; hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), msg_type(hdr), msg_size(hdr), mtu); __skb_queue_purge(list); return -EMSGSIZE; } imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); return -ENOBUFS; } rc = link_schedule_user(l, hdr); } if (pkt_cnt > 1) { l->stats.sent_fragmented++; l->stats.sent_fragments += pkt_cnt; } /* Prepare each packet for sending, and add to relevant queue: */ while ((skb = __skb_dequeue(list))) { if (likely(skb_queue_len(transmq) < cwin)) { hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; l->stats.sent_pkts++; seqno++; continue; } if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb, mss, l->addr, &new_bundle)) { if (skb) { /* Keep a ref. to the skb for next try */ l->backlog[imp].target_bskb = skb; l->backlog[imp].len++; __skb_queue_tail(backlogq, skb); } else { if (new_bundle) { l->stats.sent_bundles++; l->stats.sent_bundled++; } l->stats.sent_bundled++; } continue; } l->backlog[imp].target_bskb = NULL; l->backlog[imp].len += (1 + skb_queue_len(list)); __skb_queue_tail(backlogq, skb); skb_queue_splice_tail_init(list, backlogq); } l->snd_nxt = seqno; return rc; } static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted) { int bklog_len = skb_queue_len(&l->backlogq); struct sk_buff_head *txq = &l->transmq; int txq_len = skb_queue_len(txq); u16 cwin = l->window; /* Enter fast recovery */ if (unlikely(retransmitted)) { l->ssthresh = max_t(u16, l->window / 2, 300); l->window = min_t(u16, l->ssthresh, l->window); return; } /* Enter slow start */ if (unlikely(!released)) { l->ssthresh = max_t(u16, l->window / 2, 300); l->window = l->min_win; return; } /* Don't increase window if no pressure on the transmit queue */ if (txq_len + bklog_len < cwin) return; /* Don't increase window if there are holes the transmit queue */ if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len) return; l->cong_acks += released; /* Slow start */ if (cwin <= l->ssthresh) { l->window = min_t(u16, cwin + released, l->max_win); return; } /* Congestion avoidance */ if (l->cong_acks < cwin) return; l->window = min_t(u16, ++cwin, l->max_win); l->cong_acks = 0; } static void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *txq = &l->transmq; struct sk_buff *skb, *_skb; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; struct tipc_msg *hdr; u16 cwin = l->window; u32 imp; while (skb_queue_len(txq) < cwin) { skb = skb_peek(&l->backlogq); if (!skb) break; _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) break; __skb_dequeue(&l->backlogq); hdr = buf_msg(skb); imp = msg_importance(hdr); l->backlog[imp].len--; if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); l->rcv_unacked = 0; l->stats.sent_pkts++; seqno++; } l->snd_nxt = seqno; } /** * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) return false; if (!TIPC_SKB_CB(skb)->retr_cnt) return false; if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + msecs_to_jiffies(r->tolerance * 10))) return false; hdr = buf_msg(skb); if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) return false; pr_warn("Retransmission failure on link <%s>\n", l->name); link_print(l, "State of link "); pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); pr_info("sqno %u, prev: %x, dest: %x\n", msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); pr_info("retr_stamp %d, retr_cnt %d\n", jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), TIPC_SKB_CB(skb)->retr_cnt); trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); if (link_is_bc_sndlink(l)) { r->state = LINK_RESET; *rc |= TIPC_LINK_DOWN_EVT; } else { *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return true; } /* tipc_data_input - deliver data and name distr msgs to upper layer * * Consumes buffer if message is of right type * Node lock must be held */ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; struct tipc_msg *hdr = buf_msg(skb); switch (msg_user(hdr)) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { skb_queue_tail(mc_inputq, skb); return true; } fallthrough; case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; case GROUP_PROTOCOL: skb_queue_tail(mc_inputq, skb); return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: case TUNNEL_PROTOCOL: case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; #ifdef CONFIG_TIPC_CRYPTO case MSG_CRYPTO: if (sysctl_tipc_key_exchange_enabled && TIPC_SKB_CB(skb)->decrypted) { tipc_crypto_msg_rcv(l->net, skb); return true; } fallthrough; #endif default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); return true; } } /* tipc_link_input - process packet that has passed link protocol check * * Consumes buffer */ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq, struct sk_buff **reasm_skb) { struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; struct sk_buff_head tmpq; int usr = msg_user(hdr); int pos = 0; if (usr == MSG_BUNDLER) { skb_queue_head_init(&tmpq); l->stats.recv_bundles++; l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) tipc_data_input(l, iskb, &tmpq); tipc_skb_queue_splice_tail(&tmpq, inputq); return 0; } else if (usr == MSG_FRAGMENTER) { l->stats.recv_fragments++; if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; tipc_data_input(l, skb, inputq); } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { pr_warn_ratelimited("Unable to build fragment list\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return 0; } else if (usr == BCAST_PROTOCOL) { tipc_bcast_lock(l->net); tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); tipc_bcast_unlock(l->net); } kfree_skb(skb); return 0; } /* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the * inner message along with the ones in the old link's * deferdq * @l: tunnel link * @skb: TUNNEL_PROTOCOL message * @inputq: queue to put messages ready for delivery */ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; int ipos = 0; int rc = 0; u16 seqno; if (msg_type(hdr) == SYNCH_MSG) { kfree_skb(skb); return 0; } /* Not a fragment? */ if (likely(!msg_nof_fragms(hdr))) { if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { pr_warn_ratelimited("Unable to extract msg, defq: %d\n", skb_queue_len(fdefq)); return 0; } kfree_skb(skb); } else { /* Set fragment type for buf_append */ if (msg_fragm_no(hdr) == 1) msg_set_type(hdr, FIRST_FRAGMENT); else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) msg_set_type(hdr, FRAGMENT); else msg_set_type(hdr, LAST_FRAGMENT); if (!tipc_buf_append(reasm_tnlmsg, &skb)) { /* Successful but non-complete reassembly? */ if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) return 0; pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } iskb = skb; } do { seqno = buf_seqno(iskb); if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; } l->drop_point++; if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); return rc; } /** * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG * @ga: returned pointer to the Gap ACK blocks if any * @l: the tipc link * @hdr: the PROTOCOL/STATE_MSG header * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0) * * Return: the total Gap ACK blocks size */ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, struct tipc_msg *hdr, bool uc) { struct tipc_gap_ack_blks *p; u16 sz = 0; /* Does peer support the Gap ACK blocks feature? */ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; /* Backward compatible: peer might not support bc, but uc? */ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { if (p->ugack_cnt) { p->bgack_cnt = 0; goto ok; } } } /* Other cases: ignore! */ p = NULL; ok: *ga = p; return sz; } static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, struct tipc_link *l, u8 start_index) { struct tipc_gap_ack *gacks = &ga->gacks[start_index]; struct sk_buff *skb = skb_peek(&l->deferdq); u16 expect, seqno = 0; u8 n = 0; if (!skb) return 0; expect = buf_seqno(skb); skb_queue_walk(&l->deferdq, skb) { seqno = buf_seqno(skb); if (unlikely(more(seqno, expect))) { gacks[n].ack = htons(expect - 1); gacks[n].gap = htons(seqno - expect); if (++n >= MAX_GAP_ACK_BLKS / 2) { pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", l->name, n, skb_queue_len(&l->deferdq)); return n; } } else if (unlikely(less(seqno, expect))) { pr_warn("Unexpected skb in deferdq!\n"); continue; } expect = seqno + 1; } /* last block */ gacks[n].ack = htons(seqno); gacks[n].gap = 0; n++; return n; } /* tipc_build_gap_ack_blks - build Gap ACK blocks * @l: tipc unicast link * @hdr: the tipc message buffer to store the Gap ACK blocks after built * * The function builds Gap ACK blocks for both the unicast & broadcast receiver * links of a certain peer, the buffer after built has the network data format * as found at the struct tipc_gap_ack_blks definition. * * returns the actual allocated memory size */ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) { struct tipc_link *bcl = l->bc_rcvlink; struct tipc_gap_ack_blks *ga; u16 len; ga = (struct tipc_gap_ack_blks *)msg_data(hdr); /* Start with broadcast link first */ tipc_bcast_lock(bcl->net); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0); tipc_bcast_unlock(bcl->net); /* Now for unicast link, but an explicit NACK only (???) */ ga->ugack_cnt = (msg_seq_gap(hdr)) ? __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); ga->len = htons(len); return len; } /* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing * acked packets, also doing retransmissions if * gaps found * @l: tipc link with transmq queue to be advanced * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast) * @acked: seqno of last packet acked by peer without any gaps before * @gap: # of gap packets * @ga: buffer pointer to Gap ACK blocks from peer * @xmitq: queue for accumulating the retransmitted packets if any * @retransmitted: returned boolean value if a retransmission is really issued * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures * happens (- unlikely case) * * Return: the number of packets released from the link transmq */ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, bool *retransmitted, int *rc) { struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL; struct tipc_gap_ack *gacks = NULL; struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr; u32 qlen = skb_queue_len(&l->transmq); u16 nacked = acked, ngap = gap, gack_cnt = 0; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; u16 seqno, n = 0; u16 end = r->acked, start = end, offset = r->last_gap; u16 si = (last_ga) ? last_ga->start_index : 0; bool is_uc = !link_is_bc_sndlink(l); bool bc_has_acked = false; trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq); /* Determine Gap ACK blocks if any for the particular link */ if (ga && is_uc) { /* Get the Gap ACKs, uc part */ gack_cnt = ga->ugack_cnt; gacks = &ga->gacks[ga->bgack_cnt]; } else if (ga) { /* Copy the Gap ACKs, bc part, for later renewal if needed */ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), GFP_ATOMIC); if (likely(this_ga)) { this_ga->start_index = 0; /* Start with the bc Gap ACKs */ gack_cnt = this_ga->bgack_cnt; gacks = &this_ga->gacks[0]; } else { /* Hmm, we can get in trouble..., simply ignore it */ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n"); } } /* Advance the link transmq */ skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); next_gap_ack: if (less_eq(seqno, nacked)) { if (is_uc) goto release; /* Skip packets peer has already acked */ if (!more(seqno, r->acked)) continue; /* Get the next of last Gap ACK blocks */ while (more(seqno, end)) { if (!last_ga || si >= last_ga->bgack_cnt) break; start = end + offset + 1; end = ntohs(last_ga->gacks[si].ack); offset = ntohs(last_ga->gacks[si].gap); si++; WARN_ONCE(more(start, end) || (!offset && si < last_ga->bgack_cnt) || si > MAX_GAP_ACK_BLKS, "Corrupted Gap ACK: %d %d %d %d %d\n", start, end, offset, si, last_ga->bgack_cnt); } /* Check against the last Gap ACK block */ if (tipc_in_range(seqno, start, end)) continue; /* Update/release the packet peer is acking */ bc_has_acked = true; if (--TIPC_SKB_CB(skb)->ackers) continue; release: /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, nacked + ngap)) { /* First gap: check if repeated retrans failures? */ if (unlikely(seqno == acked + 1 && link_retransmit_failure(l, r, rc))) { /* Ignore this bc Gap ACKs if any */ kfree(this_ga); this_ga = NULL; break; } /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; if (!is_uc) r->stats.retransmitted++; *retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (n >= gack_cnt) break; nacked = ntohs(gacks[n].ack); ngap = ntohs(gacks[n].gap); n++; goto next_gap_ack; } } /* Renew last Gap ACK blocks for bc if needed */ if (bc_has_acked) { if (this_ga) { kfree(last_ga); r->last_ga = this_ga; r->last_gap = gap; } else if (last_ga) { if (less(acked, start)) { si--; offset = start - acked - 1; } else if (less(acked, end)) { acked = end; } if (si < last_ga->bgack_cnt) { last_ga->start_index = si; r->last_gap = offset; } else { kfree(last_ga); r->last_ga = NULL; r->last_gap = 0; } } else { r->last_gap = 0; } r->acked = acked; } else { kfree(this_ga); } return qlen - skb_queue_len(&l->transmq); } /* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; /* Broadcast ACK must be sent via a unicast link => defer to caller */ if (link_is_bc_rcvlink(l)) { if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) return 0; l->rcv_unacked = 0; /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ l->snd_nxt = l->rcv_nxt; return TIPC_LINK_SND_STATE; } /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } /* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message */ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); if (skb && (l->state == LINK_RESET)) msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission * Note that sending of broadcast NACK is coordinated among nodes, to * reduce the risk of NACK storms towards the sender */ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; struct sk_buff_head *dfq = &l->deferdq; u32 defq_len = skb_queue_len(dfq); int match1, match2; if (link_is_bc_rcvlink(l)) { match1 = def_cnt & 0xf; match2 = tipc_own_addr(l->net) & 0xf; if (match1 == match2) return TIPC_LINK_SND_STATE; return 0; } if (defq_len >= 3 && !((defq_len - 3) % 16)) { u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, rcvgap, 0, 0, xmitq); } return 0; } /* tipc_link_rcv - process TIPC packets/messages arriving from off-node * @l: the link that should handle the message * @skb: TIPC packet * @xmitq: queue to place packets to be sent after this call */ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *defq = &l->deferdq; struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; int released = 0; int rc = 0; /* Verify and update link state */ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) return tipc_link_proto_rcv(l, skb, xmitq); /* Don't send probe at next timeout expiration */ l->silent_intv_cnt = 0; do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; if (unlikely(!tipc_link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; kfree_skb(skb); break; } /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; kfree_skb(skb); break; } released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0, NULL, NULL, NULL, NULL); /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { if (!__tipc_skb_queue_sorted(defq, seqno, skb)) l->stats.duplicates++; rc |= tipc_link_build_nack_msg(l, xmitq); break; } /* Deliver packet */ l->rcv_nxt++; l->stats.recv_pkts++; if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) rc |= tipc_link_tnl_rcv(l, skb, l->inputq); else if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); /* Forward queues and wake up waiting users */ if (released) { tipc_link_update_cwin(l, released, 0); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } return rc; } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, bool probe_reply, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_mon_state *mstate = &l->mon_state; struct sk_buff_head *dfq = &l->deferdq; struct tipc_link *bcl = l->bc_rcvlink; struct tipc_msg *hdr; struct sk_buff *skb; bool node_up = tipc_link_is_up(bcl); u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) return; if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) return; if ((probe || probe_reply) && !skb_queue_empty(dfq)) rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; hdr = buf_msg(skb); data = msg_data(hdr); msg_set_session(hdr, l->session); msg_set_bearer_id(hdr, l->bearer_id); msg_set_net_plane(hdr, l->net_plane); msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_bc_ack_invalid(hdr, !node_up); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); msg_set_redundant_link(hdr, node_up); msg_set_seq_gap(hdr, 0); msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); if (mtyp == STATE_MSG) { if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) msg_set_seqno(hdr, l->snd_nxt_state++); msg_set_seq_gap(hdr, rcvgap); bc_rcvgap = link_bc_rcv_gap(bcl); msg_set_bc_gap(hdr, bc_rcvgap); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) glen = tipc_build_gap_ack_blks(l, hdr); tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + glen + dlen); skb_trim(skb, INT_H_SIZE + glen + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ if (mtyp == ACTIVATE_MSG) { msg_set_dest_session_valid(hdr, 1); msg_set_dest_session(hdr, l->peer_session); } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); } if (probe) l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; if (bc_rcvgap) bcl->stats.sent_nacks++; skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, skb); trace_tipc_proto_build(skb, false, l->name); } void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 onode = tipc_own_addr(l->net); struct tipc_msg *hdr, *ihdr; struct sk_buff_head tnlq; struct sk_buff *skb; u32 dnode = l->addr; __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); if (!skb) { pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } hdr = buf_msg(skb); msg_set_msgcnt(hdr, 1); msg_set_bearer_id(hdr, l->peer_bearer_id); ihdr = (struct tipc_msg *)msg_data(hdr); tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, BASIC_H_SIZE, dnode); msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, xmitq); } /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets * with contents of the link's transmit and backlog queues. */ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) { struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; bool pktcnt_need_update = false; u16 syncpt; int rc; if (!tnl) return; __skb_queue_head_init(&tnlq); /* Link Synching: * From now on, send only one single ("dummy") SYNCH message * to peer. The SYNCH message does not contain any data, just * a header conveying the synch point to the peer. */ if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, INT_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!tnlskb) { pr_warn("%sunable to create dummy SYNCH_MSG\n", link_co_err); return; } hdr = buf_msg(tnlskb); syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; msg_set_syncpt(hdr, syncpt); msg_set_bearer_id(hdr, l->peer_bearer_id); __skb_queue_tail(&tnlq, tnlskb); tipc_link_xmit(tnl, &tnlq, xmitq); return; } __skb_queue_head_init(&tmpxq); __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, TIPC_ERR_NO_PORT); if (!skb) { pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); if (mtyp == SYNCH_MSG) pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); else pktcnt = skb_queue_len(&l->transmq); pktcnt += skb_queue_len(&l->backlogq); msg_set_msgcnt(&tnlhdr, pktcnt); msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); tnl: /* Wrap each packet into a tunnel packet */ skb_queue_walk(queue, skb) { hdr = buf_msg(skb); if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); /* Tunnel link MTU is not large enough? This could be * due to: * 1) Link MTU has just changed or set differently; * 2) Or FAILOVER on the top of a SYNCH message * * The 2nd case should not happen if peer supports * TIPC_TUNNEL_ENHANCED */ if (pktlen > tnl->mtu - INT_H_SIZE) { if (mtyp == FAILOVER_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, &frags); if (rc) { pr_warn("%sunable to frag msg: rc %d\n", link_co_err, rc); return; } pktcnt += skb_queue_len(&frags) - 1; pktcnt_need_update = true; skb_queue_splice_tail_init(&frags, &tnlq); continue; } /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED * => Just warn it and return! */ pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", link_co_err, msg_user(hdr), msg_type(hdr), msg_size(hdr)); return; } msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; } skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE); skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen); __skb_queue_tail(&tnlq, tnlskb); } if (queue != &l->backlogq) { queue = &l->backlogq; goto tnl; } if (pktcnt_need_update) skb_queue_walk(&tnlq, skb) { hdr = buf_msg(skb); msg_set_msgcnt(hdr, pktcnt); } tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { struct sk_buff_head *fdefq = &tnl->failover_deferdq; tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; /* Failover the link's deferdq */ if (unlikely(!skb_queue_empty(fdefq))) { pr_warn("Link failover deferdq not empty: %d!\n", skb_queue_len(fdefq)); __skb_queue_purge(fdefq); } skb_queue_splice_init(&l->deferdq, fdefq); } } /** * tipc_link_failover_prepare() - prepare tnl for link failover * * This is a special version of the precursor - tipc_link_tnl_prepare(), * see the tipc_node_link_failover() for details * * @l: failover link * @tnl: tunnel link * @xmitq: queue for messages to be xmited */ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, struct sk_buff_head *xmitq) { struct sk_buff_head *fdefq = &tnl->failover_deferdq; tipc_link_create_dummy_tnl_msg(tnl, xmitq); /* This failover link endpoint was never established before, * so it has not received anything from peer. * Otherwise, it must be a normal failover situation or the * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes * would have to start over from scratch instead. */ tnl->drop_point = 1; tnl->failover_reasm_skb = NULL; /* Initiate the link's failover deferdq */ if (unlikely(!skb_queue_empty(fdefq))) { pr_warn("Link failover deferdq not empty: %d!\n", skb_queue_len(fdefq)); __skb_queue_purge(fdefq); } } /* tipc_link_validate_msg(): validate message against current link state * Returns true if message should be accepted, otherwise false */ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) { u16 curr_session = l->peer_session; u16 session = msg_session(hdr); int mtyp = msg_type(hdr); if (msg_user(hdr) != LINK_PROTOCOL) return true; switch (mtyp) { case RESET_MSG: if (!l->in_session) return true; /* Accept only RESET with new session number */ return more(session, curr_session); case ACTIVATE_MSG: if (!l->in_session) return true; /* Accept only ACTIVATE with new or current session number */ return !less(session, curr_session); case STATE_MSG: /* Accept only STATE with current session number */ if (!l->in_session) return false; if (session != curr_session) return false; /* Extra sanity check */ if (!tipc_link_is_up(l) && msg_ack(hdr)) return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; /* Accept only STATE with new sequence number */ return !less(msg_seqno(hdr), l->rcv_nxt_state); default: return false; } } /* tipc_link_proto_rcv(): receive link level protocol message : * Note that network plane id propagates through the network, and may * change at any time. The node with lowest numerical id determines * network plane */ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; bool reply = msg_probe(hdr), retransmitted = false; u32 dlen = msg_data_sz(hdr), glen = 0, msg_max; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 gap = msg_seq_gap(hdr); u16 ack = msg_ack(hdr); u16 rcv_nxt = l->rcv_nxt; u16 rcvgap = 0; int mtyp = msg_type(hdr); int rc = 0, released; char *if_name; void *data; trace_tipc_proto_rcv(skb, false, l->name); if (dlen > U16_MAX) goto exit; if (tipc_link_is_blocked(l) || !xmitq) goto exit; if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); if (skb_linearize(skb)) goto exit; hdr = buf_msg(skb); data = msg_data(hdr); if (!tipc_link_validate_msg(l, hdr)) { trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!"); trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!"); goto exit; } switch (mtyp) { case RESET_MSG: case ACTIVATE_MSG: msg_max = msg_max_pkt(hdr); if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id)) break; /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own priority if peer's priority is higher */ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; /* If peer is going down we want full re-establish cycle */ if (msg_peer_stopping(hdr)) { rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } /* If this endpoint was re-created while peer was ESTABLISHING * it doesn't know current session number. Force re-synch. */ if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && l->session != msg_dest_session(hdr)) { if (less(l->session, msg_dest_session(hdr))) l->session = msg_dest_session(hdr) + 1; break; } /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !tipc_link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; l->peer_session = msg_session(hdr); l->in_session = true; l->peer_bearer_id = msg_bearer_id(hdr); if (l->mtu > msg_max) l->mtu = msg_max; break; case STATE_MSG: /* Validate Gap ACK blocks, drop if invalid */ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); if (glen > dlen) break; l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } l->silent_intv_cnt = 0; l->stats.recv_states++; if (msg_probe(hdr)) l->stats.recv_probes++; if (!tipc_link_is_up(l)) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; break; } tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ if ((reply || msg_is_keepalive(hdr)) && more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l) && skb_queue_empty(&l->deferdq)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, &retransmitted, &rc); if (gap) l->stats.recv_nacks++; if (released || retransmitted) tipc_link_update_cwin(l, released, retransmitted); if (released) tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } exit: kfree_skb(skb); return rc; } /* tipc_link_build_bc_proto_msg() - create broadcast protocol message */ static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, u16 peers_snd_nxt, struct sk_buff_head *xmitq) { struct sk_buff *skb; struct tipc_msg *hdr; struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); u16 ack = l->rcv_nxt - 1; u16 gap_to = peers_snd_nxt - 1; skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return false; hdr = buf_msg(skb); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_bcast_ack(hdr, ack); msg_set_bcgap_after(hdr, ack); if (dfrd_skb) gap_to = buf_seqno(dfrd_skb) - 1; msg_set_bcgap_to(hdr, gap_to); msg_set_non_seq(hdr, bcast); __skb_queue_tail(xmitq, skb); return true; } /* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. * * Give a newly added peer node the sequence number where it should * start receiving and acking broadcast packets. */ static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { struct sk_buff_head list; __skb_queue_head_init(&list); if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); tipc_link_xmit(l, &list, xmitq); } /* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer */ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) { int mtyp = msg_type(hdr); u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); if (tipc_link_is_up(l)) return; if (msg_user(hdr) == BCAST_PROTOCOL) { l->rcv_nxt = peers_snd_nxt; l->state = LINK_ESTABLISHED; return; } if (l->peer_caps & TIPC_BCAST_SYNCH) return; if (msg_peer_node_is_up(hdr)) return; /* Compatibility: accept older, less safe initial synch data */ if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) l->rcv_nxt = peers_snd_nxt; } /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); int rc = 0; if (!tipc_link_is_up(l)) return rc; if (!msg_peer_node_is_up(hdr)) return rc; /* Open when peer acknowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) return rc; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) rc |= TIPC_LINK_SND_STATE; /* Return now if sender supports nack via STATE messages */ if (l->peer_caps & TIPC_BCAST_STATE_NACK) return rc; /* Otherwise, be backwards compatible */ if (!more(peers_snd_nxt, l->rcv_nxt)) { l->nack_state = BC_NACK_SND_CONDITIONAL; return 0; } /* Don't NACK if one was recently sent or peeked */ if (l->nack_state == BC_NACK_SND_SUPPRESS) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; return 0; } /* Conditionally delay NACK sending until next synch rcv */ if (l->nack_state == BC_NACK_SND_CONDITIONAL) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) return 0; } /* Send NACK now but suppress next one */ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); l->nack_state = BC_NACK_SND_SUPPRESS; return 0; } int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, struct sk_buff_head *retrq) { struct tipc_link *l = r->bc_sndlink; bool unused = false; int rc = 0; if (!tipc_link_is_up(r) || !r->bc_peer_is_up) return 0; if (gap) { l->stats.recv_nacks++; r->stats.recv_nacks++; } if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) return 0; trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); return rc; } /* tipc_link_bc_nack_rcv(): receive broadcast nack message * This function is here for backwards compatibility, since * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); u32 dnode = msg_destnode(hdr); int mtyp = msg_type(hdr); u16 acked = msg_bcast_ack(hdr); u16 from = acked + 1; u16 to = msg_bcgap_to(hdr); u16 peers_snd_nxt = to + 1; int rc = 0; kfree_skb(skb); if (!tipc_link_is_up(l) || !l->bc_peer_is_up) return 0; if (mtyp != STATE_MSG) return 0; if (dnode == tipc_own_addr(l->net)) { rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, xmitq); l->stats.recv_nacks++; return rc; } /* Msg for other node => suppress own NACK at next sync if applicable */ if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) l->nack_state = BC_NACK_SND_SUPPRESS; return 0; } void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); l->min_win = min_win; l->ssthresh = max_win; l->max_win = max_win; l->window = min_win; l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /** * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) { memset(&l->stats, 0, sizeof(l->stats)); } static void link_print(struct tipc_link *l, const char *str) { struct sk_buff *hskb = skb_peek(&l->transmq); u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; u16 tail = l->snd_nxt - 1; pr_info("%s Link <%s> state %x\n", str, l->name, l->state); pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", skb_queue_len(&l->transmq), head, tail, skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); } /* Parse and validate nested (link) properties valid for media, bearer and link */ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) { int err; err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, tipc_nl_prop_policy, NULL); if (err) return err; if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (prio > TIPC_MAX_LINK_PRI) return -EINVAL; } if (props[TIPC_NLA_PROP_TOL]) { u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) return -EINVAL; } if (props[TIPC_NLA_PROP_WIN]) { u32 max_win; max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) return -EINVAL; } return 0; } static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; struct nlattr *stats; struct nla_map { u32 key; u32 val; }; struct nla_map map[] = { {TIPC_NLA_STATS_RX_INFO, 0}, {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, {TIPC_NLA_STATS_TX_INFO, 0}, {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? s->msg_length_counts : 1}, {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, {TIPC_NLA_STATS_RX_STATES, s->recv_states}, {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, {TIPC_NLA_STATS_TX_STATES, s->sent_states}, {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0} }; stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!stats) return -EMSGSIZE; for (i = 0; i < ARRAY_SIZE(map); i++) if (nla_put_u32(skb, map[i].key, map[i].val)) goto msg_full; nla_nest_end(skb, stats); return 0; msg_full: nla_nest_cancel(skb, stats); return -EMSGSIZE; } /* Caller should hold appropriate locks to protect the link */ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags) { u32 self = tipc_own_addr(net); struct nlattr *attrs; struct nlattr *prop; void *hdr; int err; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts)) goto attr_msg_full; if (tipc_link_is_up(link)) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; if (link->active) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_stats(msg->skb, &link->stats); if (err) goto attr_msg_full; nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, struct tipc_stats *stats) { int i; struct nlattr *nest; struct nla_map { __u32 key; __u32 val; }; struct nla_map map[] = { {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts}, {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts}, {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? (stats->accu_queue_sz / stats->queue_sz_counts) : 0} }; nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!nest) return -EMSGSIZE; for (i = 0; i < ARRAY_SIZE(map); i++) if (nla_put_u32(skb, map[i].key, map[i].val)) goto msg_full; nla_nest_end(skb, nest); return 0; msg_full: nla_nest_cancel(skb, nest); return -EMSGSIZE; } int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *bcl) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; u32 bc_mode = tipc_bcast_get_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); if (!bcl) return 0; tipc_bcast_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) { tipc_bcast_unlock(net); return -EMSGSIZE; } attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; /* The broadcast link is always up */ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) goto prop_msg_full; if (bc_mode & BCLINK_MODE_SEL) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, bc_ratio)) goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); if (err) goto attr_msg_full; tipc_bcast_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: tipc_bcast_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; if (l->bc_rcvlink) l->bc_rcvlink->tolerance = tol; if (tipc_link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) { l->abort_limit = limit; } /** * tipc_link_dump - dump TIPC link data * @l: tipc link to be dumped * @dqueues: bitmask to decide if any link queue to be dumped? * - TIPC_DUMP_NONE: don't dump link queues * - TIPC_DUMP_TRANSMQ: dump link transmq queue * - TIPC_DUMP_BACKLOGQ: dump link backlog queue * - TIPC_DUMP_DEFERDQ: dump link deferd queue * - TIPC_DUMP_INPUTQ: dump link input queue * - TIPC_DUMP_WAKEUP: dump link wakeup queue * - TIPC_DUMP_ALL: dump all the link queues above * @buf: returned buffer of dump data in format */ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) { int i = 0; size_t sz = (dqueues) ? LINK_LMAX : LINK_LMIN; struct sk_buff_head *list; struct sk_buff *hskb, *tskb; u32 len; if (!l) { i += scnprintf(buf, sz, "link data: (null)\n"); return i; } i += scnprintf(buf, sz, "link data: %x", l->addr); i += scnprintf(buf + i, sz - i, " %x", l->state); i += scnprintf(buf + i, sz - i, " %u", l->in_session); i += scnprintf(buf + i, sz - i, " %u", l->session); i += scnprintf(buf + i, sz - i, " %u", l->peer_session); i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt); i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt); i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt_state); i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt_state); i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); list = &l->transmq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = &l->deferdq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = &l->backlogq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = l->inputq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u\n", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); if (dqueues & TIPC_DUMP_TRANSMQ) { i += scnprintf(buf + i, sz - i, "transmq: "); i += tipc_list_dump(&l->transmq, false, buf + i); } if (dqueues & TIPC_DUMP_BACKLOGQ) { i += scnprintf(buf + i, sz - i, "backlogq: <%u %u %u %u %u>, ", l->backlog[TIPC_LOW_IMPORTANCE].len, l->backlog[TIPC_MEDIUM_IMPORTANCE].len, l->backlog[TIPC_HIGH_IMPORTANCE].len, l->backlog[TIPC_CRITICAL_IMPORTANCE].len, l->backlog[TIPC_SYSTEM_IMPORTANCE].len); i += tipc_list_dump(&l->backlogq, false, buf + i); } if (dqueues & TIPC_DUMP_DEFERDQ) { i += scnprintf(buf + i, sz - i, "deferdq: "); i += tipc_list_dump(&l->deferdq, false, buf + i); } if (dqueues & TIPC_DUMP_INPUTQ) { i += scnprintf(buf + i, sz - i, "inputq: "); i += tipc_list_dump(l->inputq, false, buf + i); } if (dqueues & TIPC_DUMP_WAKEUP) { i += scnprintf(buf + i, sz - i, "wakeup: "); i += tipc_list_dump(&l->wakeupq, false, buf + i); } return i; }
10 10 10 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 // SPDX-License-Identifier: GPL-2.0-or-later /* * kernel/stop_machine.c * * Copyright (C) 2008, 2005 IBM Corporation. * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au * Copyright (C) 2010 SUSE Linux Products GmbH * Copyright (C) 2010 Tejun Heo <tj@kernel.org> */ #include <linux/compiler.h> #include <linux/completion.h> #include <linux/cpu.h> #include <linux/init.h> #include <linux/kthread.h> #include <linux/export.h> #include <linux/percpu.h> #include <linux/sched.h> #include <linux/stop_machine.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/smpboot.h> #include <linux/atomic.h> #include <linux/nmi.h> #include <linux/sched/wake_q.h> /* * Structure to determine completion condition and record errors. May * be shared by works on different cpus. */ struct cpu_stop_done { atomic_t nr_todo; /* nr left to execute */ int ret; /* collected return value */ struct completion completion; /* fired if nr_todo reaches 0 */ }; /* the actual stopper, one per every possible cpu, enabled on online cpus */ struct cpu_stopper { struct task_struct *thread; raw_spinlock_t lock; bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ struct cpu_stop_work stop_work; /* for stop_cpus */ unsigned long caller; cpu_stop_fn_t fn; }; static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); static bool stop_machine_initialized = false; void print_stop_info(const char *log_lvl, struct task_struct *task) { /* * If @task is a stopper task, it cannot migrate and task_cpu() is * stable. */ struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task)); if (task != stopper->thread) return; printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller); } /* static data for stop_cpus */ static DEFINE_MUTEX(stop_cpus_mutex); static bool stop_cpus_in_progress; static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) { memset(done, 0, sizeof(*done)); atomic_set(&done->nr_todo, nr_todo); init_completion(&done->completion); } /* signal completion unless @done is NULL */ static void cpu_stop_signal_done(struct cpu_stop_done *done) { if (atomic_dec_and_test(&done->nr_todo)) complete(&done->completion); } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, struct cpu_stop_work *work, struct wake_q_head *wakeq) { list_add_tail(&work->list, &stopper->works); wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; preempt_disable(); raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) __cpu_stop_queue_work(stopper, work, &wakeq); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); wake_up_q(&wakeq); preempt_enable(); return enabled; } /** * stop_one_cpu - stop a cpu * @cpu: cpu to stop * @fn: function to execute * @arg: argument to @fn * * Execute @fn(@arg) on @cpu. @fn is run in a process context with * the highest priority preempting any task on the cpu and * monopolizing it. This function returns after the execution is * complete. * * This function doesn't guarantee @cpu stays online till @fn * completes. If @cpu goes down in the middle, execution may happen * partially or fully on different cpus. @fn should either be ready * for that or the caller should ensure that @cpu stays online until * this function completes. * * CONTEXT: * Might sleep. * * RETURNS: * -ENOENT if @fn(@arg) was not executed because @cpu was offline; * otherwise, the return value of @fn. */ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ }; cpu_stop_init_done(&done, 1); if (!cpu_stop_queue_work(cpu, &work)) return -ENOENT; /* * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup * cycle by doing a preemption: */ cond_resched(); wait_for_completion(&done.completion); return done.ret; } /* This controls the threads on each CPU. */ enum multi_stop_state { /* Dummy starting state for thread. */ MULTI_STOP_NONE, /* Awaiting everyone to be scheduled. */ MULTI_STOP_PREPARE, /* Disable interrupts. */ MULTI_STOP_DISABLE_IRQ, /* Run the function */ MULTI_STOP_RUN, /* Exit */ MULTI_STOP_EXIT, }; struct multi_stop_data { cpu_stop_fn_t fn; void *data; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ unsigned int num_threads; const struct cpumask *active_cpus; enum multi_stop_state state; atomic_t thread_ack; }; static void set_state(struct multi_stop_data *msdata, enum multi_stop_state newstate) { /* Reset ack counter. */ atomic_set(&msdata->thread_ack, msdata->num_threads); smp_wmb(); WRITE_ONCE(msdata->state, newstate); } /* Last one to ack a state moves to the next state. */ static void ack_state(struct multi_stop_data *msdata) { if (atomic_dec_and_test(&msdata->thread_ack)) set_state(msdata, msdata->state + 1); } notrace void __weak stop_machine_yield(const struct cpumask *cpumask) { cpu_relax(); } /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { struct multi_stop_data *msdata = data; enum multi_stop_state newstate, curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; const struct cpumask *cpumask; unsigned long flags; bool is_active; /* * When called from stop_machine_from_inactive_cpu(), irq might * already be disabled. Save the state and restore it on exit. */ local_save_flags(flags); if (!msdata->active_cpus) { cpumask = cpu_online_mask; is_active = cpu == cpumask_first(cpumask); } else { cpumask = msdata->active_cpus; is_active = cpumask_test_cpu(cpu, cpumask); } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ stop_machine_yield(cpumask); newstate = READ_ONCE(msdata->state); if (newstate != curstate) { curstate = newstate; switch (curstate) { case MULTI_STOP_DISABLE_IRQ: local_irq_disable(); hard_irq_disable(); break; case MULTI_STOP_RUN: if (is_active) err = msdata->fn(msdata->data); break; default: break; } ack_state(msdata); } else if (curstate > MULTI_STOP_PREPARE) { /* * At this stage all other CPUs we depend on must spin * in the same loop. Any reason for hard-lockup should * be detected and reported on their side. */ touch_nmi_watchdog(); rcu_momentary_eqs(); } } while (curstate != MULTI_STOP_EXIT); local_irq_restore(flags); return err; } static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, int cpu2, struct cpu_stop_work *work2) { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); DEFINE_WAKE_Q(wakeq); int err; retry: /* * The waking up of stopper threads has to happen in the same * scheduling context as the queueing. Otherwise, there is a * possibility of one of the above stoppers being woken up by another * CPU, and preempting us. This will cause us to not wake up the other * stopper forever. */ preempt_disable(); raw_spin_lock_irq(&stopper1->lock); raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); if (!stopper1->enabled || !stopper2->enabled) { err = -ENOENT; goto unlock; } /* * Ensure that if we race with __stop_cpus() the stoppers won't get * queued up in reverse order leading to system deadlock. * * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has * queued a work on cpu1 but not on cpu2, we hold both locks. * * It can be falsely true but it is safe to spin until it is cleared, * queue_stop_cpus_work() does everything under preempt_disable(). */ if (unlikely(stop_cpus_in_progress)) { err = -EDEADLK; goto unlock; } err = 0; __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); unlock: raw_spin_unlock(&stopper2->lock); raw_spin_unlock_irq(&stopper1->lock); if (unlikely(err == -EDEADLK)) { preempt_enable(); while (stop_cpus_in_progress) cpu_relax(); goto retry; } wake_up_q(&wakeq); preempt_enable(); return err; } /** * stop_two_cpus - stops two cpus * @cpu1: the cpu to stop * @cpu2: the other cpu to stop * @fn: function to execute * @arg: argument to @fn * * Stops both the current and specified CPU and runs @fn on one of them. * * returns when both are completed. */ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; struct cpu_stop_work work1, work2; struct multi_stop_data msdata; msdata = (struct multi_stop_data){ .fn = fn, .data = arg, .num_threads = 2, .active_cpus = cpumask_of(cpu1), }; work1 = work2 = (struct cpu_stop_work){ .fn = multi_cpu_stop, .arg = &msdata, .done = &done, .caller = _RET_IP_, }; cpu_stop_init_done(&done, 2); set_state(&msdata, MULTI_STOP_PREPARE); if (cpu1 > cpu2) swap(cpu1, cpu2); if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) return -ENOENT; wait_for_completion(&done.completion); return done.ret; } /** * stop_one_cpu_nowait - stop a cpu but don't wait for completion * @cpu: cpu to stop * @fn: function to execute * @arg: argument to @fn * @work_buf: pointer to cpu_stop_work structure * * Similar to stop_one_cpu() but doesn't wait for completion. The * caller is responsible for ensuring @work_buf is currently unused * and will remain untouched until stopper starts executing @fn. * * CONTEXT: * Don't care. * * RETURNS: * true if cpu_stop_work was queued successfully and @fn will be called, * false otherwise. */ bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf) { *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, }; return cpu_stop_queue_work(cpu, work_buf); } static bool queue_stop_cpus_work(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg, struct cpu_stop_done *done) { struct cpu_stop_work *work; unsigned int cpu; bool queued = false; /* * Disable preemption while queueing to avoid getting * preempted by a stopper which might wait for other stoppers * to enter @fn which can lead to deadlock. */ preempt_disable(); stop_cpus_in_progress = true; barrier(); for_each_cpu(cpu, cpumask) { work = &per_cpu(cpu_stopper.stop_work, cpu); work->fn = fn; work->arg = arg; work->done = done; work->caller = _RET_IP_; if (cpu_stop_queue_work(cpu, work)) queued = true; } barrier(); stop_cpus_in_progress = false; preempt_enable(); return queued; } static int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; cpu_stop_init_done(&done, cpumask_weight(cpumask)); if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) return -ENOENT; wait_for_completion(&done.completion); return done.ret; } /** * stop_cpus - stop multiple cpus * @cpumask: cpus to stop * @fn: function to execute * @arg: argument to @fn * * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, * @fn is run in a process context with the highest priority * preempting any task on the cpu and monopolizing it. This function * returns after all executions are complete. * * This function doesn't guarantee the cpus in @cpumask stay online * till @fn completes. If some cpus go down in the middle, execution * on the cpu may happen partially or fully on different cpus. @fn * should either be ready for that or the caller should ensure that * the cpus stay online until this function completes. * * All stop_cpus() calls are serialized making it safe for @fn to wait * for all cpus to start executing it. * * CONTEXT: * Might sleep. * * RETURNS: * -ENOENT if @fn(@arg) was not executed at all because all cpus in * @cpumask were offline; otherwise, 0 if all executions of @fn * returned 0, any non zero return value if any returned non zero. */ static int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) { int ret; /* static works are used, process one request at a time */ mutex_lock(&stop_cpus_mutex); ret = __stop_cpus(cpumask, fn, arg); mutex_unlock(&stop_cpus_mutex); return ret; } static int cpu_stop_should_run(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); unsigned long flags; int run; raw_spin_lock_irqsave(&stopper->lock, flags); run = !list_empty(&stopper->works); raw_spin_unlock_irqrestore(&stopper->lock, flags); return run; } static void cpu_stopper_thread(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stop_work *work; repeat: work = NULL; raw_spin_lock_irq(&stopper->lock); if (!list_empty(&stopper->works)) { work = list_first_entry(&stopper->works, struct cpu_stop_work, list); list_del_init(&work->list); } raw_spin_unlock_irq(&stopper->lock); if (work) { cpu_stop_fn_t fn = work->fn; void *arg = work->arg; struct cpu_stop_done *done = work->done; int ret; /* cpu stop callbacks must not sleep, make in_atomic() == T */ stopper->caller = work->caller; stopper->fn = fn; preempt_count_inc(); ret = fn(arg); if (done) { if (ret) done->ret = ret; cpu_stop_signal_done(done); } preempt_count_dec(); stopper->fn = NULL; stopper->caller = 0; WARN_ONCE(preempt_count(), "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); goto repeat; } } void stop_machine_park(int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); /* * Lockless. cpu_stopper_thread() will take stopper->lock and flush * the pending works before it parks, until then it is fine to queue * the new works. */ stopper->enabled = false; kthread_park(stopper->thread); } static void cpu_stop_create(unsigned int cpu) { sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); } static void cpu_stop_park(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); WARN_ON(!list_empty(&stopper->works)); } void stop_machine_unpark(int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); stopper->enabled = true; kthread_unpark(stopper->thread); } static struct smp_hotplug_thread cpu_stop_threads = { .store = &cpu_stopper.thread, .thread_should_run = cpu_stop_should_run, .thread_fn = cpu_stopper_thread, .thread_comm = "migration/%u", .create = cpu_stop_create, .park = cpu_stop_park, .selfparking = true, }; static int __init cpu_stop_init(void) { unsigned int cpu; for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); raw_spin_lock_init(&stopper->lock); INIT_LIST_HEAD(&stopper->works); } BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); stop_machine_unpark(raw_smp_processor_id()); stop_machine_initialized = true; return 0; } early_initcall(cpu_stop_init); int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, .data = data, .num_threads = num_online_cpus(), .active_cpus = cpus, }; lockdep_assert_cpus_held(); if (!stop_machine_initialized) { /* * Handle the case where stop_machine() is called * early in boot before stop_machine() has been * initialized. */ unsigned long flags; int ret; WARN_ON_ONCE(msdata.num_threads != 1); local_irq_save(flags); hard_irq_disable(); ret = (*fn)(data); local_irq_restore(flags); return ret; } /* Set the initial state and stop all online cpus. */ set_state(&msdata, MULTI_STOP_PREPARE); return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); } int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { int ret; /* No CPUs can come up or down during this. */ cpus_read_lock(); ret = stop_machine_cpuslocked(fn, data, cpus); cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); #ifdef CONFIG_SCHED_SMT int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data) { const struct cpumask *smt_mask = cpu_smt_mask(cpu); struct multi_stop_data msdata = { .fn = fn, .data = data, .num_threads = cpumask_weight(smt_mask), .active_cpus = smt_mask, }; lockdep_assert_cpus_held(); /* Set the initial state and stop all online cpus. */ set_state(&msdata, MULTI_STOP_PREPARE); return stop_cpus(smt_mask, multi_cpu_stop, &msdata); } EXPORT_SYMBOL_GPL(stop_core_cpuslocked); #endif /** * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU * @fn: the function to run * @data: the data ptr for the @fn() * @cpus: the cpus to run the @fn() on (NULL = any online cpu) * * This is identical to stop_machine() but can be called from a CPU which * is not active. The local CPU is in the process of hotplug (so no other * CPU hotplug can start) and not marked active and doesn't have enough * context to sleep. * * This function provides stop_machine() functionality for such state by * using busy-wait for synchronization and executing @fn directly for local * CPU. * * CONTEXT: * Local CPU is inactive. Temporarily stops all active CPUs. * * RETURNS: * 0 if all executions of @fn returned 0, any non zero return value if any * returned non zero. */ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, .data = data, .active_cpus = cpus }; struct cpu_stop_done done; int ret; /* Local CPU must be inactive and CPU hotplug in progress. */ BUG_ON(cpu_active(raw_smp_processor_id())); msdata.num_threads = num_active_cpus() + 1; /* +1 for local */ /* No proper task established and can't sleep - busy wait for lock. */ while (!mutex_trylock(&stop_cpus_mutex)) cpu_relax(); /* Schedule work on other CPUs and execute directly for local CPU */ set_state(&msdata, MULTI_STOP_PREPARE); cpu_stop_init_done(&done, num_active_cpus()); queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, &done); ret = multi_cpu_stop(&msdata); /* Busy wait for completion. */ while (!completion_done(&done.completion)) cpu_relax(); mutex_unlock(&stop_cpus_mutex); return ret ?: done.ret; }
5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 // SPDX-License-Identifier: GPL-2.0-only /* * sd.c Copyright (C) 1992 Drew Eckhardt * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale * * Linux scsi disk driver * Initial versions: Drew Eckhardt * Subsequent revisions: Eric Youngdale * Modification history: * - Drew Eckhardt <drew@colorado.edu> original * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple * outstanding request, and other enhancements. * Support loadable low-level scsi drivers. * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using * eight major numbers. * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in * sd_init and cleanups. * - Alex Davis <letmein@erols.com> Fix problem where partition info * not being read in sd_open. Fix problem where removable media * could be ejected after sd_open. * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: * Support 32k/1M disks. * * Logging policy (needs CONFIG_SCSI_LOGGING defined): * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 * - entering other commands: SCSI_LOG_HLQUEUE level 3 * Note: when the logging level is set by the user, it must be greater * than the level indicated above to trigger output. */ #include <linux/bio-integrity.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/hdreg.h> #include <linux/errno.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/blkpg.h> #include <linux/blk-pm.h> #include <linux/delay.h> #include <linux/rw_hint.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/string_helpers.h> #include <linux/slab.h> #include <linux/sed-opal.h> #include <linux/pm_runtime.h> #include <linux/pr.h> #include <linux/t10-pi.h> #include <linux/uaccess.h> #include <linux/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_devinfo.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsicam.h> #include <scsi/scsi_common.h> #include "sd.h" #include "scsi_priv.h" #include "scsi_logging.h" MODULE_AUTHOR("Eric Youngdale"); MODULE_DESCRIPTION("SCSI disk (sd) driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); #define SD_MINORS 16 static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode); static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static void sd_shutdown(struct device *); static void scsi_disk_release(struct device *cdev); static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; static const char *sd_cache_types[] = { "write through", "none", "write back", "write back, no read (daft)" }; static void sd_set_flush_flag(struct scsi_disk *sdkp, struct queue_limits *lim) { if (sdkp->WCE) { lim->features |= BLK_FEAT_WRITE_CACHE; if (sdkp->DPOFUA) lim->features |= BLK_FEAT_FUA; else lim->features &= ~BLK_FEAT_FUA; } else { lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); } } static ssize_t cache_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ct, rcd, wce, sp; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; char buffer[64]; char *buffer_data; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; static const char temp[] = "temporary "; int len, ret; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) /* no cache control on RBC devices; theoretically they * can do it, but there's probably so many exceptions * it's not worth the risk */ return -EINVAL; if (strncmp(buf, temp, sizeof(temp) - 1) == 0) { buf += sizeof(temp) - 1; sdkp->cache_override = 1; } else { sdkp->cache_override = 0; } ct = sysfs_match_string(sd_cache_types, buf); if (ct < 0) return -EINVAL; rcd = ct & 0x01 ? 1 : 0; wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; if (sdkp->cache_override) { struct queue_limits lim; sdkp->WCE = wce; sdkp->RCD = rcd; lim = queue_limits_start_update(sdkp->disk->queue); sd_set_flush_flag(sdkp, &lim); ret = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (ret) return ret; return count; } if (scsi_mode_sense(sdp, 0x08, 8, 0, buffer, sizeof(buffer), SD_TIMEOUT, sdkp->max_retries, &data, NULL)) return -EINVAL; len = min_t(size_t, sizeof(buffer), data.length - data.header_length - data.block_descriptor_length); buffer_data = buffer + data.header_length + data.block_descriptor_length; buffer_data[2] &= ~0x05; buffer_data[2] |= wce << 2 | rcd; sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; /* * Ensure WP, DPOFUA, and RESERVED fields are cleared in * received mode parameter buffer before doing MODE SELECT. */ data.device_specific = 0; ret = scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (ret) { if (ret > 0 && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } sd_revalidate_disk(sdkp->disk); return count; } static ssize_t manage_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop && sdp->manage_runtime_start_stop && sdp->manage_shutdown); } static DEVICE_ATTR_RO(manage_start_stop); static ssize_t manage_system_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); } static ssize_t manage_system_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_system_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_system_start_stop); static ssize_t manage_runtime_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); } static ssize_t manage_runtime_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_runtime_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_runtime_start_stop); static ssize_t manage_shutdown_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_shutdown); } static ssize_t manage_shutdown_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_shutdown = v; return count; } static DEVICE_ATTR_RW(manage_shutdown); static ssize_t allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->device->allow_restart); } static ssize_t allow_restart_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool v; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; if (kstrtobool(buf, &v)) return -EINVAL; sdp->allow_restart = v; return count; } static DEVICE_ATTR_RW(allow_restart); static ssize_t cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); int ct = sdkp->RCD + 2*sdkp->WCE; return sprintf(buf, "%s\n", sd_cache_types[ct]); } static DEVICE_ATTR_RW(cache_type); static ssize_t FUA_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->DPOFUA); } static DEVICE_ATTR_RO(FUA); static ssize_t protection_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->protection_type); } static ssize_t protection_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); unsigned int val; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &val); if (err) return err; if (val <= T10_PI_TYPE3_PROTECTION) sdkp->protection_type = val; return count; } static DEVICE_ATTR_RW(protection_type); static ssize_t protection_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; unsigned int dif, dix; dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); dix = scsi_host_dix_capable(sdp->host, sdkp->protection_type); if (!dix && scsi_host_dix_capable(sdp->host, T10_PI_TYPE0_PROTECTION)) { dif = 0; dix = 1; } if (!dif && !dix) return sprintf(buf, "none\n"); return sprintf(buf, "%s%u\n", dix ? "dix" : "dif", dif); } static DEVICE_ATTR_RO(protection_mode); static ssize_t app_tag_own_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->ATO); } static DEVICE_ATTR_RO(app_tag_own); static ssize_t thin_provisioning_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->lbpme); } static DEVICE_ATTR_RO(thin_provisioning); /* sysfs_match_string() requires dense arrays */ static const char *lbp_mode[] = { [SD_LBP_FULL] = "full", [SD_LBP_UNMAP] = "unmap", [SD_LBP_WS16] = "writesame_16", [SD_LBP_WS10] = "writesame_10", [SD_LBP_ZERO] = "writesame_zero", [SD_LBP_DISABLE] = "disabled", }; static ssize_t provisioning_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", lbp_mode[sdkp->provisioning_mode]); } static ssize_t provisioning_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; int mode, err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK) return -EINVAL; mode = sysfs_match_string(lbp_mode, buf); if (mode < 0) return -EINVAL; lim = queue_limits_start_update(sdkp->disk->queue); sd_config_discard(sdkp, &lim, mode); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(provisioning_mode); /* sysfs_match_string() requires dense arrays */ static const char *zeroing_mode[] = { [SD_ZERO_WRITE] = "write", [SD_ZERO_WS] = "writesame", [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap", [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap", }; static ssize_t zeroing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", zeroing_mode[sdkp->zeroing_mode]); } static ssize_t zeroing_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int mode; if (!capable(CAP_SYS_ADMIN)) return -EACCES; mode = sysfs_match_string(zeroing_mode, buf); if (mode < 0) return -EINVAL; sdkp->zeroing_mode = mode; return count; } static DEVICE_ATTR_RW(zeroing_mode); static ssize_t max_medium_access_timeouts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_medium_access_timeouts); } static ssize_t max_medium_access_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &sdkp->max_medium_access_timeouts); return err ? err : count; } static DEVICE_ATTR_RW(max_medium_access_timeouts); static ssize_t max_write_same_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_ws_blocks); } static ssize_t max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; unsigned long max; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; err = kstrtoul(buf, 10, &max); if (err) return err; if (max == 0) sdp->no_write_same = 1; else if (max <= SD_MAX_WS16_BLOCKS) { sdp->no_write_same = 0; sdkp->max_ws_blocks = max; } lim = queue_limits_start_update(sdkp->disk->queue); sd_config_write_same(sdkp, &lim); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(max_write_same_blocks); static ssize_t zoned_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); if (sdkp->device->type == TYPE_ZBC) return sprintf(buf, "host-managed\n"); if (sdkp->zoned == 1) return sprintf(buf, "host-aware\n"); if (sdkp->zoned == 2) return sprintf(buf, "drive-managed\n"); return sprintf(buf, "none\n"); } static DEVICE_ATTR_RO(zoned_cap); static ssize_t max_retries_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdev = sdkp->device; int retries, err; err = kstrtoint(buf, 10, &retries); if (err) return err; if (retries == SCSI_CMD_RETRIES_NO_LIMIT || retries <= SD_MAX_RETRIES) { sdkp->max_retries = retries; return count; } sdev_printk(KERN_ERR, sdev, "max_retries must be between -1 and %d\n", SD_MAX_RETRIES); return -EINVAL; } static ssize_t max_retries_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%d\n", sdkp->max_retries); } static DEVICE_ATTR_RW(max_retries); static struct attribute *sd_disk_attrs[] = { &dev_attr_cache_type.attr, &dev_attr_FUA.attr, &dev_attr_allow_restart.attr, &dev_attr_manage_start_stop.attr, &dev_attr_manage_system_start_stop.attr, &dev_attr_manage_runtime_start_stop.attr, &dev_attr_manage_shutdown.attr, &dev_attr_protection_type.attr, &dev_attr_protection_mode.attr, &dev_attr_app_tag_own.attr, &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, &dev_attr_zoned_cap.attr, &dev_attr_max_retries.attr, NULL, }; ATTRIBUTE_GROUPS(sd_disk); static struct class sd_disk_class = { .name = "scsi_disk", .dev_release = scsi_disk_release, .dev_groups = sd_disk_groups, }; /* * Don't request a new module, as that could deadlock in multipath * environment. */ static void sd_default_probe(dev_t devt) { } /* * Device no to disk mapping: * * major disc2 disc p1 * |............|.............|....|....| <- dev_t * 31 20 19 8 7 4 3 0 * * Inside a major, we have 16k disks, however mapped non- * contiguously. The first 16 disks are for major0, the next * ones with major1, ... Disk 256 is for major0 again, disk 272 * for major1, ... * As we stay compatible with our numbering scheme, we can reuse * the well-know SCSI majors 8, 65--71, 136--143. */ static int sd_major(int major_idx) { switch (major_idx) { case 0: return SCSI_DISK0_MAJOR; case 1 ... 7: return SCSI_DISK1_MAJOR + major_idx - 1; case 8 ... 15: return SCSI_DISK8_MAJOR + major_idx - 8; default: BUG(); return 0; /* shut up gcc */ } } #ifdef CONFIG_BLK_SED_OPAL static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send) { struct scsi_disk *sdkp = data; struct scsi_device *sdev = sdkp->device; u8 cdb[12] = { 0, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; int ret; cdb[0] = send ? SECURITY_PROTOCOL_OUT : SECURITY_PROTOCOL_IN; cdb[1] = secp; put_unaligned_be16(spsp, &cdb[2]); put_unaligned_be32(len, &cdb[6]); ret = scsi_execute_cmd(sdev, cdb, send ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, len, SD_TIMEOUT, sdkp->max_retries, &exec_args); return ret <= 0 ? ret : -EIO; } #endif /* CONFIG_BLK_SED_OPAL */ /* * Look up the DIX operation based on whether the command is read or * write and whether dix and dif are enabled. */ static unsigned int sd_prot_op(bool write, bool dix, bool dif) { /* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */ static const unsigned int ops[] = { /* wrt dix dif */ SCSI_PROT_NORMAL, /* 0 0 0 */ SCSI_PROT_READ_STRIP, /* 0 0 1 */ SCSI_PROT_READ_INSERT, /* 0 1 0 */ SCSI_PROT_READ_PASS, /* 0 1 1 */ SCSI_PROT_NORMAL, /* 1 0 0 */ SCSI_PROT_WRITE_INSERT, /* 1 0 1 */ SCSI_PROT_WRITE_STRIP, /* 1 1 0 */ SCSI_PROT_WRITE_PASS, /* 1 1 1 */ }; return ops[write << 2 | dix << 1 | dif]; } /* * Returns a mask of the protection flags that are valid for a given DIX * operation. */ static unsigned int sd_prot_flag_mask(unsigned int prot_op) { static const unsigned int flag_mask[] = { [SCSI_PROT_NORMAL] = 0, [SCSI_PROT_READ_STRIP] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_READ_INSERT] = SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_READ_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_INSERT] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_WRITE_STRIP] = SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, }; return flag_mask[prot_op]; } static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, unsigned int dix, unsigned int dif) { struct request *rq = scsi_cmd_to_rq(scmd); struct bio *bio = rq->bio; unsigned int prot_op = sd_prot_op(rq_data_dir(rq), dix, dif); unsigned int protect = 0; if (dix) { /* DIX Type 0, 1, 2, 3 */ if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM)) scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM; if (bio_integrity_flagged(bio, BIP_CHECK_GUARD)) scmd->prot_flags |= SCSI_PROT_GUARD_CHECK; } if (dif != T10_PI_TYPE3_PROTECTION) { /* DIX/DIF Type 0, 1, 2 */ scmd->prot_flags |= SCSI_PROT_REF_INCREMENT; if (bio_integrity_flagged(bio, BIP_CHECK_REFTAG)) scmd->prot_flags |= SCSI_PROT_REF_CHECK; } if (dif) { /* DIX/DIF Type 1, 2, 3 */ scmd->prot_flags |= SCSI_PROT_TRANSFER_PI; if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK)) protect = 3 << 5; /* Disable target PI checking */ else protect = 1 << 5; /* Enable target PI checking */ } scsi_set_prot_op(scmd, prot_op); scsi_set_prot_type(scmd, dif); scmd->prot_flags &= sd_prot_flag_mask(prot_op); return protect; } static void sd_disable_discard(struct scsi_disk *sdkp) { sdkp->provisioning_mode = SD_LBP_DISABLE; blk_queue_disable_discard(sdkp->disk->queue); } static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode) { unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; lim->discard_granularity = max(sdkp->physical_block_size, sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { case SD_LBP_FULL: case SD_LBP_DISABLE: break; case SD_LBP_UNMAP: max_blocks = min_not_zero(sdkp->max_unmap_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS16: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS10: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS10_BLOCKS); break; case SD_LBP_ZERO: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); break; } lim->max_hw_discard_sectors = max_blocks * (logical_block_size >> SECTOR_SHIFT); } static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) { struct page *page; page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!page) return NULL; clear_highpage(page); bvec_set_page(&rq->special_vec, page, data_len, 0); rq->rq_flags |= RQF_SPECIAL_PAYLOAD; return bvec_virt(&rq->special_vec); } static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int data_len = 24; char *buf; buf = sd_set_special_bvec(rq, data_len); if (!buf) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = UNMAP; cmd->cmnd[8] = 24; put_unaligned_be16(6 + 16, &buf[0]); put_unaligned_be16(16, &buf[2]); put_unaligned_be64(lba, &buf[8]); put_unaligned_be32(nr_blocks, &buf[16]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = SD_TIMEOUT; return scsi_alloc_sgtables(cmd); } static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size, physical_block_size_sectors, max_atomic, unit_min, unit_max; if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) || sdkp->protection_type == T10_PI_TYPE2_PROTECTION) return; physical_block_size_sectors = sdkp->physical_block_size / sdkp->device->sector_size; unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ? sdkp->atomic_granularity : physical_block_size_sectors); /* * Only use atomic boundary when we have the odd scenario of * sdkp->max_atomic == 0, which the spec does permit. */ if (sdkp->max_atomic) { max_atomic = sdkp->max_atomic; unit_max = rounddown_pow_of_two(sdkp->max_atomic); sdkp->use_atomic_write_boundary = 0; } else { max_atomic = sdkp->max_atomic_with_boundary; unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary); sdkp->use_atomic_write_boundary = 1; } /* * Ensure compliance with granularity and alignment. For now, keep it * simple and just don't support atomic writes for values mismatched * with max_{boundary}atomic, physical block size, and * atomic_granularity itself. * * We're really being distrustful by checking unit_max also... */ if (sdkp->atomic_granularity > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_granularity) return; if (unit_max > 1 && unit_max % sdkp->atomic_granularity) return; } if (sdkp->atomic_alignment > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_alignment) return; if (unit_max > 1 && unit_max % sdkp->atomic_alignment) return; } lim->atomic_write_hw_max = max_atomic * logical_block_size; lim->atomic_write_hw_boundary = 0; lim->atomic_write_hw_unit_min = unit_min * logical_block_size; lim->atomic_write_hw_unit_max = unit_max * logical_block_size; lim->features |= BLK_FEAT_ATOMIC_WRITES; } static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_SAME_16; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = WRITE_SAME; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); if (!(rq->cmd_flags & REQ_NOUNMAP)) { switch (sdkp->zeroing_mode) { case SD_ZERO_WS16_UNMAP: return sd_setup_write_same16_cmnd(cmd, true); case SD_ZERO_WS10_UNMAP: return sd_setup_write_same10_cmnd(cmd, true); } } if (sdp->no_write_same) { rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); return sd_setup_write_same10_cmnd(cmd, false); } static void sd_disable_write_same(struct scsi_disk *sdkp) { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; blk_queue_disable_write_zeroes(sdkp->disk->queue); } static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size; if (sdkp->device->no_write_same) { sdkp->max_ws_blocks = 0; goto out; } /* Some devices can not handle block counts above 0xffff despite * supporting WRITE SAME(16). Consequently we default to 64k * blocks per I/O unless the device explicitly advertises a * bigger limit. */ if (sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS16_BLOCKS); else if (sdkp->ws16 || sdkp->ws10 || sdkp->device->no_report_opcodes) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); else { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; } if (sdkp->lbprz && sdkp->lbpws) sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; else if (sdkp->lbprz && sdkp->lbpws10) sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; else if (sdkp->max_ws_blocks) sdkp->zeroing_mode = SD_ZERO_WS; else sdkp->zeroing_mode = SD_ZERO_WRITE; if (sdkp->max_ws_blocks && sdkp->physical_block_size > logical_block_size) { /* * Reporting a maximum number of blocks that is not aligned * on the device physical size would cause a large write same * request to be split into physically unaligned chunks by * __blkdev_issue_write_zeroes() even if the caller of this * functions took care to align the large request. So make sure * the maximum reported is aligned to the device physical block * size. This is only an optional optimization for regular * disks, but this is mandatory to avoid failure of large write * same requests directed at sequential write required zones of * host-managed ZBC disks. */ sdkp->max_ws_blocks = round_down(sdkp->max_ws_blocks, bytes_to_logical(sdkp->device, sdkp->physical_block_size)); } out: lim->max_write_zeroes_sectors = sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); } static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); /* flush requests don't perform I/O, zero the S/G table */ memset(&cmd->sdb, 0, sizeof(cmd->sdb)); if (cmd->device->use_16_for_sync) { cmd->cmnd[0] = SYNCHRONIZE_CACHE_16; cmd->cmd_len = 16; } else { cmd->cmnd[0] = SYNCHRONIZE_CACHE; cmd->cmd_len = 10; } cmd->transfersize = 0; cmd->allowed = sdkp->max_retries; rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; return BLK_STS_OK; } /** * sd_group_number() - Compute the GROUP NUMBER field * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER * field. * * From SBC-5 r05 (https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf): * 0: no relative lifetime. * 1: shortest relative lifetime. * 2: second shortest relative lifetime. * 3 - 0x3d: intermediate relative lifetimes. * 0x3e: second longest relative lifetime. * 0x3f: longest relative lifetime. */ static u8 sd_group_number(struct scsi_cmnd *cmd) { const struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); if (!sdkp->rscs) return 0; return min3((u32)rq->bio->bi_write_hint, (u32)sdkp->permanent_stream_count, 0x3fu); } static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = SD_EXT_CDB_SIZE; cmd->cmnd[0] = VARIABLE_LENGTH_CMD; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[7] = 0x18; /* Additional CDB len */ cmd->cmnd[9] = write ? WRITE_32 : READ_32; cmd->cmnd[10] = flags; cmd->cmnd[11] = dld & 0x07; put_unaligned_be64(lba, &cmd->cmnd[12]); put_unaligned_be32(lba, &cmd->cmnd[20]); /* Expected Indirect LBA */ put_unaligned_be32(nr_blocks, &cmd->cmnd[28]); return BLK_STS_OK; } static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = 16; cmd->cmnd[0] = write ? WRITE_16 : READ_16; cmd->cmnd[1] = flags | ((dld >> 2) & 0x01); cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd); cmd->cmnd[15] = 0; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); return BLK_STS_OK; } static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { cmd->cmd_len = 10; cmd->cmnd[0] = write ? WRITE_10 : READ_10; cmd->cmnd[1] = flags; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[9] = 0; put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); return BLK_STS_OK; } static blk_status_t sd_setup_rw6_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { /* Avoid that 0 blocks gets translated into 256 blocks. */ if (WARN_ON_ONCE(nr_blocks == 0)) return BLK_STS_IOERR; if (unlikely(flags & 0x8)) { /* * This happens only if this drive failed 10byte rw * command with ILLEGAL_REQUEST during operation and * thus turned off use_10_for_rw. */ scmd_printk(KERN_ERR, cmd, "FUA write on READ/WRITE(6) drive\n"); return BLK_STS_IOERR; } cmd->cmd_len = 6; cmd->cmnd[0] = write ? WRITE_6 : READ_6; cmd->cmnd[1] = (lba >> 16) & 0x1f; cmd->cmnd[2] = (lba >> 8) & 0xff; cmd->cmnd[3] = lba & 0xff; cmd->cmnd[4] = nr_blocks; cmd->cmnd[5] = 0; return BLK_STS_OK; } /* * Check if a command has a duration limit set. If it does, and the target * device supports CDL and the feature is enabled, return the limit * descriptor index to use. Return 0 (no limit) otherwise. */ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd) { struct scsi_device *sdp = sdkp->device; int hint; if (!sdp->cdl_supported || !sdp->cdl_enable) return 0; /* * Use "no limit" if the request ioprio does not specify a duration * limit hint. */ hint = IOPRIO_PRIO_HINT(req_get_ioprio(scsi_cmd_to_rq(scmd))); if (hint < IOPRIO_HINT_DEV_DURATION_LIMIT_1 || hint > IOPRIO_HINT_DEV_DURATION_LIMIT_7) return 0; return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; } static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd, sector_t lba, unsigned int nr_blocks, bool boundary, unsigned char flags) { cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_ATOMIC_16; cmd->cmnd[1] = flags; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); if (boundary) put_unaligned_be16(nr_blocks, &cmd->cmnd[10]); else put_unaligned_be16(0, &cmd->cmnd[10]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); cmd->cmnd[14] = 0; cmd->cmnd[15] = 0; return BLK_STS_OK; } static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; unsigned int dld; blk_status_t ret; unsigned int dif; bool dix; ret = scsi_alloc_sgtables(cmd); if (ret != BLK_STS_OK) return ret; ret = BLK_STS_IOERR; if (!scsi_device_online(sdp) || sdp->changed) { scmd_printk(KERN_ERR, cmd, "device offline or changed\n"); goto fail; } if (blk_rq_pos(rq) + blk_rq_sectors(rq) > get_capacity(rq->q->disk)) { scmd_printk(KERN_ERR, cmd, "access beyond end of device\n"); goto fail; } if ((blk_rq_pos(rq) & mask) || (blk_rq_sectors(rq) & mask)) { scmd_printk(KERN_ERR, cmd, "request not aligned to the logical block size\n"); goto fail; } /* * Some SD card readers can't handle accesses which touch the * last one or two logical blocks. Split accesses as needed. */ threshold = sdkp->capacity - SD_LAST_BUGGY_SECTORS; if (unlikely(sdp->last_sector_bug && lba + nr_blocks > threshold)) { if (lba < threshold) { /* Access up to the threshold but not beyond */ nr_blocks = threshold - lba; } else { /* Access only a single logical block */ nr_blocks = 1; } } fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0; dix = scsi_prot_sg_count(cmd); dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); dld = sd_cdl_dld(sdkp, cmd); if (dif || dix) protect = sd_setup_protect_cmnd(cmd, dix, dif); else protect = 0; if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if (rq->cmd_flags & REQ_ATOMIC) { ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks, sdkp->use_atomic_write_boundary, protect | fua); } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw || protect || rq->bio->bi_write_hint) { ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks, protect | fua); } else { ret = sd_setup_rw6_cmnd(cmd, write, lba, nr_blocks, protect | fua); } if (unlikely(ret != BLK_STS_OK)) goto fail; /* * We shouldn't disconnect in the middle of a sector, so with a dumb * host adapter, it's safe to assume that we can at least transfer * this many bytes between each connect / disconnect. */ cmd->transfersize = sdp->sector_size; cmd->underflow = nr_blocks << 9; cmd->allowed = sdkp->max_retries; cmd->sdb.length = nr_blocks * sdp->sector_size; SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, cmd, "%s: block=%llu, count=%d\n", __func__, (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, cmd, "%s %d/%u 512 byte blocks.\n", write ? "writing" : "reading", nr_blocks, blk_rq_sectors(rq))); /* * This indicates that the command is ready from our end to be queued. */ return BLK_STS_OK; fail: scsi_free_sgtables(cmd); return ret; } static blk_status_t sd_init_command(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); switch (req_op(rq)) { case REQ_OP_DISCARD: switch (scsi_disk(rq->q->disk)->provisioning_mode) { case SD_LBP_UNMAP: return sd_setup_unmap_cmnd(cmd); case SD_LBP_WS16: return sd_setup_write_same16_cmnd(cmd, true); case SD_LBP_WS10: return sd_setup_write_same10_cmnd(cmd, true); case SD_LBP_ZERO: return sd_setup_write_same10_cmnd(cmd, false); default: return BLK_STS_TARGET; } case REQ_OP_WRITE_ZEROES: return sd_setup_write_zeroes_cmnd(cmd); case REQ_OP_FLUSH: return sd_setup_flush_cmnd(cmd); case REQ_OP_READ: case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, false); case REQ_OP_ZONE_RESET_ALL: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, true); case REQ_OP_ZONE_OPEN: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); case REQ_OP_ZONE_CLOSE: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); case REQ_OP_ZONE_FINISH: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; } } static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = scsi_cmd_to_rq(SCpnt); if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) mempool_free(rq->special_vec.bv_page, sd_page_pool); } static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp) { if (sdkp->device->removable || sdkp->write_prot) { if (disk_check_media_change(disk)) return true; } /* * Force a full rescan after ioctl(BLKRRPART). While the disk state has * nothing to do with partitions, BLKRRPART is used to force a full * revalidate after things like a format for historical reasons. */ return test_bit(GD_NEED_PART_SCAN, &disk->state); } /** * sd_open - open a scsi disk device * @disk: disk to open * @mode: open mode * * Returns 0 if successful. Returns a negated errno value in case * of error. * * Note: This can be called from a user context (e.g. fsck(1) ) * or from within the kernel (e.g. as a result of a mount(1) ). * In the latter case @inode and @filp carry an abridged amount * of information as noted above. * * Locking: called with disk->open_mutex held. **/ static int sd_open(struct gendisk *disk, blk_mode_t mode) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; int retval; if (scsi_device_get(sdev)) return -ENXIO; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); /* * If the device is in error recovery, wait until it is done. * If the device is offline, then disallow any access to it. */ retval = -ENXIO; if (!scsi_block_when_processing_errors(sdev)) goto error_out; if (sd_need_revalidate(disk, sdkp)) sd_revalidate_disk(disk); /* * If the drive is empty, just let the open fail. */ retval = -ENOMEDIUM; if (sdev->removable && !sdkp->media_present && !(mode & BLK_OPEN_NDELAY)) goto error_out; /* * If the device has the write protect tab set, have the open fail * if the user expects to be able to write to the thing. */ retval = -EROFS; if (sdkp->write_prot && (mode & BLK_OPEN_WRITE)) goto error_out; /* * It is possible that the disk changing stuff resulted in * the device being taken offline. If this is the case, * report this to the user, and don't pretend that the * open actually succeeded. */ retval = -ENXIO; if (!scsi_device_online(sdev)) goto error_out; if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); } return 0; error_out: scsi_device_put(sdev); return retval; } /** * sd_release - invoked when the (last) close(2) is called on this * scsi disk. * @disk: disk to release * * Returns 0. * * Note: may block (uninterruptible) if error recovery is underway * on this disk. * * Locking: called with disk->open_mutex held. **/ static void sd_release(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); if (atomic_dec_return(&sdkp->openers) == 0 && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } scsi_device_put(sdev); } static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; struct Scsi_Host *host = sdp->host; sector_t capacity = logical_to_sectors(sdp, sdkp->capacity); int diskinfo[4]; /* default to most commonly used values */ diskinfo[0] = 0x40; /* 1 << 6 */ diskinfo[1] = 0x20; /* 1 << 5 */ diskinfo[2] = capacity >> 11; /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) host->hostt->bios_param(sdp, bdev, capacity, diskinfo); else scsicam_bios_param(bdev, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; geo->cylinders = diskinfo[2]; return 0; } /** * sd_ioctl - process an ioctl * @bdev: target block device * @mode: open mode * @cmd: ioctl command number * @arg: this is third argument given to ioctl(2) system call. * Often contains a pointer. * * Returns 0 if successful (some ioctls return positive numbers on * success as well). Returns a negated errno value in case of error. * * Note: most ioctls are forward onto the block subsystem or further * down in the scsi subsystem. **/ static int sd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; void __user *p = (void __user *)arg; int error; SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, " "cmd=0x%x\n", disk->disk_name, cmd)); if (bdev_is_partition(bdev) && !capable(CAP_SYS_RAWIO)) return -ENOIOCTLCMD; /* * If we are in the middle of error recovery, don't let anyone * else try and use this device. Also, if error recovery fails, it * may try and take the device offline, in which case all further * access to the device is prohibited. */ error = scsi_ioctl_block_when_processing_errors(sdp, cmd, (mode & BLK_OPEN_NDELAY)); if (error) return error; if (is_sed_ioctl(cmd)) return sed_ioctl(sdkp->opal_dev, cmd, p); return scsi_ioctl(sdp, mode & BLK_OPEN_WRITE, cmd, p); } static void set_media_not_present(struct scsi_disk *sdkp) { if (sdkp->media_present) sdkp->device->changed = 1; if (sdkp->device->removable) { sdkp->media_present = 0; sdkp->capacity = 0; } } static int media_not_present(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { if (!scsi_sense_valid(sshdr)) return 0; /* not invoked for commands that could return deferred errors */ switch (sshdr->sense_key) { case UNIT_ATTENTION: case NOT_READY: /* medium not present */ if (sshdr->asc == 0x3A) { set_media_not_present(sdkp); return 1; } } return 0; } /** * sd_check_events - check media events * @disk: kernel device descriptor * @clearing: disk events currently being cleared * * Returns mask of DISK_EVENT_*. * * Note: this function is invoked from the block subsystem. **/ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing) { struct scsi_disk *sdkp = disk->private_data; struct scsi_device *sdp; int retval; bool disk_changed; if (!sdkp) return 0; sdp = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_check_events\n")); /* * If the device is offline, don't send any commands - just pretend as * if the command failed. If the device ever comes back online, we * can deal with it then. It is only because of unrecoverable errors * that we would ever take a device offline in the first place. */ if (!scsi_device_online(sdp)) { set_media_not_present(sdkp); goto out; } /* * Using TEST_UNIT_READY enables differentiation between drive with * no cartridge loaded - NOT READY, drive with changed cartridge - * UNIT ATTENTION, or with same cartridge - GOOD STATUS. * * Drives that auto spin down. eg iomega jaz 1G, will be started * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever * sd_revalidate() is called. */ if (scsi_block_when_processing_errors(sdp)) { struct scsi_sense_hdr sshdr = { 0, }; retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, sdkp->max_retries, &sshdr); /* failed to execute TUR, assume media not present */ if (retval < 0 || host_byte(retval)) { set_media_not_present(sdkp); goto out; } if (media_not_present(sdkp, &sshdr)) goto out; } /* * For removable scsi disk we have to recognise the presence * of a disk in the drive. */ if (!sdkp->media_present) sdp->changed = 1; sdkp->media_present = 1; out: /* * sdp->changed is set under the following conditions: * * Medium present state has changed in either direction. * Device has indicated UNIT_ATTENTION. */ disk_changed = sdp->changed; sdp->changed = 0; return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0; } static int sd_sync_cache(struct scsi_disk *sdkp) { int res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; /* Leave the rest of the command zero to indicate flush everything. */ const unsigned char cmd[16] = { sdp->use_16_for_sync ? SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .allowed = 3, .result = SCMD_FAILURE_RESULT_ANY, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, .sshdr = &sshdr, .failures = &failures, }; if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Synchronize Cache(10) failed", res); if (res < 0) return res; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* we need to evaluate the error return */ if (sshdr.asc == 0x3a || /* medium not present */ sshdr.asc == 0x20 || /* invalid command */ (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; /* * If a format is in progress or if the drive does not * support sync, there is not much we can do because * this is called during shutdown or suspend so just * return success so those operations can proceed. */ if ((sshdr.asc == 0x04 && sshdr.ascq == 0x04) || sshdr.sense_key == ILLEGAL_REQUEST) return 0; } switch (host_byte(res)) { /* ignore errors due to racing a disconnection */ case DID_BAD_TARGET: case DID_NO_CONNECT: return 0; /* signal the upper layer it might try again */ case DID_BUS_BUSY: case DID_IMM_RETRY: case DID_REQUEUE: case DID_SOFT_ERROR: return -EBUSY; default: return -EIO; } } return 0; } static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_revalidate_disk(sdkp->disk); } static int sd_get_unique_id(struct gendisk *disk, u8 id[16], enum blk_unique_id type) { struct scsi_device *sdev = scsi_disk(disk)->device; const struct scsi_vpd *vpd; const unsigned char *d; int ret = -ENXIO, len; rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg83); if (!vpd) goto out_unlock; ret = -EINVAL; for (d = vpd->data + 4; d < vpd->data + vpd->len; d += d[3] + 4) { /* we only care about designators with LU association */ if (((d[1] >> 4) & 0x3) != 0x00) continue; if ((d[1] & 0xf) != type) continue; /* * Only exit early if a 16-byte descriptor was found. Otherwise * keep looking as one with more entropy might still show up. */ len = d[3]; if (len != 8 && len != 12 && len != 16) continue; ret = len; memcpy(id, d + 4, len); if (len == 16) break; } out_unlock: rcu_read_unlock(); return ret; } static int sd_scsi_to_pr_err(struct scsi_sense_hdr *sshdr, int result) { switch (host_byte(result)) { case DID_TRANSPORT_MARGINAL: case DID_TRANSPORT_DISRUPTED: case DID_BUS_BUSY: return PR_STS_RETRY_PATH_FAILURE; case DID_NO_CONNECT: return PR_STS_PATH_FAILED; case DID_TRANSPORT_FAILFAST: return PR_STS_PATH_FAST_FAILED; } switch (status_byte(result)) { case SAM_STAT_RESERVATION_CONFLICT: return PR_STS_RESERVATION_CONFLICT; case SAM_STAT_CHECK_CONDITION: if (!scsi_sense_valid(sshdr)) return PR_STS_IOERR; if (sshdr->sense_key == ILLEGAL_REQUEST && (sshdr->asc == 0x26 || sshdr->asc == 0x24)) return -EINVAL; fallthrough; default: return PR_STS_IOERR; } } static int sd_pr_in_command(struct block_device *bdev, u8 sa, unsigned char *data, int data_len) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; put_unaligned_be16(data_len, &cmd[7]); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, data, data_len, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) { int result, i, data_offset, num_copy_keys; u32 num_keys = keys_info->num_keys; int data_len = num_keys * 8 + 8; u8 *data; data = kzalloc(data_len, GFP_KERNEL); if (!data) return -ENOMEM; result = sd_pr_in_command(bdev, READ_KEYS, data, data_len); if (result) goto free_data; keys_info->generation = get_unaligned_be32(&data[0]); keys_info->num_keys = get_unaligned_be32(&data[4]) / 8; data_offset = 8; num_copy_keys = min(num_keys, keys_info->num_keys); for (i = 0; i < num_copy_keys; i++) { keys_info->keys[i] = get_unaligned_be64(&data[data_offset]); data_offset += 8; } free_data: kfree(data); return result; } static int sd_pr_read_reservation(struct block_device *bdev, struct pr_held_reservation *rsv) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; u8 data[24] = { }; int result, len; result = sd_pr_in_command(bdev, READ_RESERVATION, data, sizeof(data)); if (result) return result; len = get_unaligned_be32(&data[4]); if (!len) return 0; /* Make sure we have at least the key and type */ if (len < 14) { sdev_printk(KERN_INFO, sdev, "READ RESERVATION failed due to short return buffer of %d bytes\n", len); return -EINVAL; } rsv->generation = get_unaligned_be32(&data[0]); rsv->key = get_unaligned_be64(&data[8]); rsv->type = scsi_pr_type_to_block(data[21] & 0x0f); return 0; } static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; u8 cmd[16] = { 0, }; u8 data[24] = { 0, }; cmd[0] = PERSISTENT_RESERVE_OUT; cmd[1] = sa; cmd[2] = type; put_unaligned_be32(sizeof(data), &cmd[5]); put_unaligned_be64(key, &data[0]); put_unaligned_be64(sa_key, &data[8]); data[20] = flags; result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, &data, sizeof(data), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags) { if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, old_key, new_key, 0, (1 << 0) /* APTPL */); } static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, u32 flags) { if (flags) return -EOPNOTSUPP; return sd_pr_out_command(bdev, 0x01, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { return sd_pr_out_command(bdev, 0x02, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort) { return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key, block_pr_type_to_scsi(type), 0); } static int sd_pr_clear(struct block_device *bdev, u64 key) { return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0); } static const struct pr_ops sd_pr_ops = { .pr_register = sd_pr_register, .pr_reserve = sd_pr_reserve, .pr_release = sd_pr_release, .pr_preempt = sd_pr_preempt, .pr_clear = sd_pr_clear, .pr_read_keys = sd_pr_read_keys, .pr_read_reservation = sd_pr_read_reservation, }; static void scsi_disk_free_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); put_device(&sdkp->disk_dev); } static const struct block_device_operations sd_fops = { .owner = THIS_MODULE, .open = sd_open, .release = sd_release, .ioctl = sd_ioctl, .getgeo = sd_getgeo, .compat_ioctl = blkdev_compat_ptr_ioctl, .check_events = sd_check_events, .unlock_native_capacity = sd_unlock_native_capacity, .report_zones = sd_zbc_report_zones, .get_unique_id = sd_get_unique_id, .free_disk = scsi_disk_free_disk, .pr_ops = &sd_pr_ops, }; /** * sd_eh_reset - reset error handling callback * @scmd: sd-issued command that has failed * * This function is called by the SCSI midlayer before starting * SCSI EH. When counting medium access failures we have to be * careful to register it only only once per device and SCSI EH run; * there might be several timed out commands which will cause the * 'max_medium_access_timeouts' counter to trigger after the first * SCSI EH run already and set the device to offline. * So this function resets the internal counter before starting SCSI EH. **/ static void sd_eh_reset(struct scsi_cmnd *scmd) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); /* New SCSI EH run, reset gate variable */ sdkp->ignore_medium_access_errors = false; } /** * sd_eh_action - error handling callback * @scmd: sd-issued command that has failed * @eh_disp: The recovery disposition suggested by the midlayer * * This function is called by the SCSI midlayer upon completion of an * error test command (currently TEST UNIT READY). The result of sending * the eh command is passed in eh_disp. We're looking for devices that * fail medium access commands but are OK with non access commands like * test unit ready (so wrongly see the device as having a successful * recovery) **/ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); struct scsi_device *sdev = scmd->device; if (!scsi_device_online(sdev) || !scsi_medium_access_command(scmd) || host_byte(scmd->result) != DID_TIME_OUT || eh_disp != SUCCESS) return eh_disp; /* * The device has timed out executing a medium access command. * However, the TEST UNIT READY command sent during error * handling completed successfully. Either the device is in the * process of recovering or has it suffered an internal failure * that prevents access to the storage medium. */ if (!sdkp->ignore_medium_access_errors) { sdkp->medium_access_timed_out++; sdkp->ignore_medium_access_errors = true; } /* * If the device keeps failing read/write commands but TEST UNIT * READY always completes successfully we assume that medium * access is no longer possible and take the device offline. */ if (sdkp->medium_access_timed_out >= sdkp->max_medium_access_timeouts) { scmd_printk(KERN_ERR, scmd, "Medium access timeout failure. Offlining disk!\n"); mutex_lock(&sdev->state_mutex); scsi_device_set_state(sdev, SDEV_OFFLINE); mutex_unlock(&sdev->state_mutex); return SUCCESS; } return eh_disp; } static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; unsigned int transferred, good_bytes; u64 start_lba, end_lba, bad_lba; /* * Some commands have a payload smaller than the device logical * block size (e.g. INQUIRY on a 4K disk). */ if (scsi_bufflen(scmd) <= sdev->sector_size) return 0; /* Check if we have a 'bad_lba' information */ if (!scsi_get_sense_info_fld(scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &bad_lba)) return 0; /* * If the bad lba was reported incorrectly, we have no idea where * the error is. */ start_lba = sectors_to_logical(sdev, blk_rq_pos(req)); end_lba = start_lba + bytes_to_logical(sdev, scsi_bufflen(scmd)); if (bad_lba < start_lba || bad_lba >= end_lba) return 0; /* * resid is optional but mostly filled in. When it's unused, * its value is zero, so we assume the whole buffer transferred */ transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd); /* This computation should always be done in terms of the * resolution of the device's medium. */ good_bytes = logical_to_bytes(sdev, bad_lba - start_lba); return min(good_bytes, transferred); } /** * sd_done - bottom half handler: called when the lower level * driver has completed (successfully or otherwise) a scsi command. * @SCpnt: mid-level's per command structure. * * Note: potentially run from within an ISR. Must not block. **/ static int sd_done(struct scsi_cmnd *SCpnt) { int result = SCpnt->result; unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); unsigned int sector_size = SCpnt->device->sector_size; unsigned int resid; struct scsi_sense_hdr sshdr; struct request *req = scsi_cmd_to_rq(SCpnt); struct scsi_disk *sdkp = scsi_disk(req->q->disk); int sense_valid = 0; int sense_deferred = 0; switch (req_op(req)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0; scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; default: /* * In case of bogus fw or device, we could end up having * an unaligned partial completion. Check this here and force * alignment. */ resid = scsi_get_resid(SCpnt); if (resid & (sector_size - 1)) { sd_printk(KERN_INFO, sdkp, "Unaligned partial completion (resid=%u, sector_sz=%u)\n", resid, sector_size); scsi_print_command(SCpnt); resid = min(scsi_bufflen(SCpnt), round_up(resid, sector_size)); scsi_set_resid(SCpnt, resid); } } if (result) { sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); if (sense_valid) sense_deferred = scsi_sense_is_deferred(&sshdr); } sdkp->medium_access_timed_out = 0; if (!scsi_status_is_check_condition(result) && (!sense_valid || sense_deferred)) goto out; switch (sshdr.sense_key) { case HARDWARE_ERROR: case MEDIUM_ERROR: good_bytes = sd_completed_bytes(SCpnt); break; case RECOVERED_ERROR: good_bytes = scsi_bufflen(SCpnt); break; case NO_SENSE: /* This indicates a false check condition, so ignore it. An * unknown amount of data was transferred so treat it as an * error. */ SCpnt->result = 0; memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); break; case ABORTED_COMMAND: if (sshdr.asc == 0x10) /* DIF: Target detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case ILLEGAL_REQUEST: switch (sshdr.asc) { case 0x10: /* DIX: Host detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case 0x20: /* INVALID COMMAND OPCODE */ case 0x24: /* INVALID FIELD IN CDB */ switch (SCpnt->cmnd[0]) { case UNMAP: sd_disable_discard(sdkp); break; case WRITE_SAME_16: case WRITE_SAME: if (SCpnt->cmnd[1] & 8) { /* UNMAP */ sd_disable_discard(sdkp); } else { sd_disable_write_same(sdkp); req->rq_flags |= RQF_QUIET; } break; } } break; default: break; } out: if (sdkp->device->type == TYPE_ZBC) good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr); SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, "sd_done: completed %d of %d bytes\n", good_bytes, scsi_bufflen(SCpnt))); return good_bytes; } /* * spinup disk - called only in sd_revalidate_disk() */ static void sd_spinup_disk(struct scsi_disk *sdkp) { static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; int spintime, sense_valid = 0; unsigned int the_result; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, /* Retry when scsi_status_is_good would return false 3 times */ { .result = SCMD_FAILURE_STAT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; spintime = 0; /* Spin up drives, as required. Only do this at boot time */ /* Spinup needs to be done for module loads too. */ do { bool media_was_present = sdkp->media_present; scsi_failures_reset_retries(&failures); the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { /* * If the drive has indicated to us that it doesn't * have any media in it, don't bother with any more * polling. */ if (media_not_present(sdkp, &sshdr)) { if (media_was_present) sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n"); return; } sense_valid = scsi_sense_valid(&sshdr); } if (!scsi_status_is_check_condition(the_result)) { /* no sense, TUR either succeeded or failed * with a status error */ if(!spintime && !scsi_status_is_good(the_result)) { sd_print_result(sdkp, "Test Unit Ready failed", the_result); } break; } /* * The device does not want the automatic start to be issued. */ if (sdkp->device->no_start_on_add) break; if (sense_valid && sshdr.sense_key == NOT_READY) { if (sshdr.asc == 4 && sshdr.ascq == 3) break; /* manual intervention required */ if (sshdr.asc == 4 && sshdr.ascq == 0xb) break; /* standby */ if (sshdr.asc == 4 && sshdr.ascq == 0xc) break; /* unavailable */ if (sshdr.asc == 4 && sshdr.ascq == 0x1b) break; /* sanitize in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x24) break; /* depopulation in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x25) break; /* depopulation restoration in progress */ /* * Issue command to spin up drive when not ready */ if (!spintime) { /* Return immediately and start spin cycle */ const u8 start_cmd[10] = { [0] = START_STOP, [1] = 1, [4] = sdkp->device->start_stop_pwr_cond ? 0x11 : 1, }; sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); scsi_execute_cmd(sdkp->device, start_cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); spintime_expire = jiffies + 100 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); printk(KERN_CONT "."); /* * Wait for USB flash devices with slow firmware. * Yes, this sense key/ASC combination shouldn't * occur here. It's characteristic of these devices. */ } else if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x28) { if (!spintime) { spintime_expire = jiffies + 5 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); } else { /* we don't understand the sense code, so it's * probably pointless to loop */ if(!spintime) { sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); sd_print_sense_hdr(sdkp, &sshdr); } break; } } while (spintime && time_before_eq(jiffies, spintime_expire)); if (spintime) { if (scsi_status_is_good(the_result)) printk(KERN_CONT "ready\n"); else printk(KERN_CONT "not responding...\n"); } } /* * Determine whether disk supports Data Integrity Field. */ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; u8 type; if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) { sdkp->protection_type = 0; return 0; } type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ if (type > T10_PI_TYPE3_PROTECTION) { sd_printk(KERN_ERR, sdkp, "formatted with unsupported" \ " protection type %u. Disabling disk!\n", type); sdkp->protection_type = 0; return -ENODEV; } sdkp->protection_type = type; return 0; } static void sd_config_protection(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) sd_dif_config_host(sdkp, lim); if (!sdkp->protection_type) return; if (!scsi_host_dif_capable(sdp->host, sdkp->protection_type)) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling DIF Type %u protection\n", sdkp->protection_type); sdkp->protection_type = 0; } sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIF Type %u protection\n", sdkp->protection_type); } static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, struct scsi_sense_hdr *sshdr, int sense_valid, int the_result) { if (sense_valid) sd_print_sense_hdr(sdkp, sshdr); else sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); /* * Set dirty bit for removable devices if not ready - * sometimes drives will not report this properly. */ if (sdp->removable && sense_valid && sshdr->sense_key == NOT_READY) set_media_not_present(sdkp); /* * We used to set media_present to 0 here to indicate no media * in the drive, but some drives fail read capacity even with * media present, so we can't do that. */ sdkp->capacity = 0; /* unknown mapped to zero - as usual */ } #define RC16_LEN 32 #if RC16_LEN > SD_BUF_SIZE #error RC16_LEN must not be more than SD_BUF_SIZE #endif #define READ_CAPACITY_RETRIES_ON_RESET 10 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, struct queue_limits *lim, unsigned char *buffer) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int sense_valid = 0; int the_result; int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; unsigned int alignment; unsigned long long lba; unsigned sector_size; if (sdp->no_read_capacity_16) return -EINVAL; do { memset(cmd, 0, 16); cmd[0] = SERVICE_ACTION_IN_16; cmd[1] = SAI_READ_CAPACITY_16; cmd[13] = RC16_LEN; memset(buffer, 0, RC16_LEN); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, RC16_LEN, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { if (media_not_present(sdkp, &sshdr)) return -ENODEV; sense_valid = scsi_sense_valid(&sshdr); if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) /* Invalid Command Operation Code or * Invalid Field in CDB, just retry * silently with RC10 */ return -EINVAL; if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29 && sshdr.ascq == 0x00) /* Device reset might occur several times, * give it one more chance */ if (--reset_retries > 0) continue; } retries--; } while (the_result && retries); if (the_result) { sd_print_result(sdkp, "Read Capacity(16) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[8]); lba = get_unaligned_be64(&buffer[0]); if (sd_read_protection_type(sdkp, buffer) < 0) { sdkp->capacity = 0; return -ENODEV; } /* Logical blocks per physical block exponent */ sdkp->physical_block_size = (1 << (buffer[13] & 0xf)) * sector_size; /* RC basis */ sdkp->rc_basis = (buffer[12] >> 4) & 0x3; /* Lowest aligned logical block */ alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; lim->alignment_offset = alignment; if (alignment && sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "physical block alignment offset: %u\n", alignment); if (buffer[14] & 0x80) { /* LBPME */ sdkp->lbpme = 1; if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; } sdkp->capacity = lba + 1; return sector_size; } static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { static const u8 cmd[10] = { READ_CAPACITY }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, /* Device reset might occur several times so retry a lot */ { .sense = UNIT_ATTENTION, .asc = 0x29, .allowed = READ_CAPACITY_RETRIES_ON_RESET, .result = SAM_STAT_CHECK_CONDITION, }, /* Any other error not listed above retry 3 times */ { .result = SCMD_FAILURE_RESULT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int sense_valid = 0; int the_result; sector_t lba; unsigned sector_size; memset(buffer, 0, 8); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, 8, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { sense_valid = scsi_sense_valid(&sshdr); if (media_not_present(sdkp, &sshdr)) return -ENODEV; } if (the_result) { sd_print_result(sdkp, "Read Capacity(10) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[4]); lba = get_unaligned_be32(&buffer[0]); if (sdp->no_read_capacity_16 && (lba == 0xffffffff)) { /* Some buggy (usb cardreader) devices return an lba of 0xffffffff when the want to report a size of 0 (with which they really mean no media is present) */ sdkp->capacity = 0; sdkp->physical_block_size = sector_size; return sector_size; } sdkp->capacity = lba + 1; sdkp->physical_block_size = sector_size; return sector_size; } static int sd_try_rc16_first(struct scsi_device *sdp) { if (sdp->host->max_cmd_len < 16) return 0; if (sdp->try_rc_10_first) return 0; if (sdp->scsi_level > SCSI_SPC_2) return 1; if (scsi_device_protection(sdp)) return 1; return 0; } /* * read disk capacity */ static void sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned char *buffer) { int sector_size; struct scsi_device *sdp = sdkp->device; if (sd_try_rc16_first(sdp)) { sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size == -ENODEV) return; if (sector_size < 0) sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size < 0) return; } else { sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size < 0) return; if ((sizeof(sdkp->capacity) > 4) && (sdkp->capacity > 0xffffffffULL)) { int old_sector_size = sector_size; sd_printk(KERN_NOTICE, sdkp, "Very big device. " "Trying to use READ CAPACITY(16).\n"); sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size < 0) { sd_printk(KERN_NOTICE, sdkp, "Using 0xffffffff as device size\n"); sdkp->capacity = 1 + (sector_t) 0xffffffff; sector_size = old_sector_size; goto got_data; } /* Remember that READ CAPACITY(16) succeeded */ sdp->try_rc_10_first = 0; } } /* Some devices are known to return the total number of blocks, * not the highest block number. Some devices have versions * which do this and others which do not. Some devices we might * suspect of doing this but we don't know for certain. * * If we know the reported capacity is wrong, decrement it. If * we can only guess, then assume the number of blocks is even * (usually true but not always) and err on the side of lowering * the capacity. */ if (sdp->fix_capacity || (sdp->guess_capacity && (sdkp->capacity & 0x01))) { sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " "from its reported value: %llu\n", (unsigned long long) sdkp->capacity); --sdkp->capacity; } got_data: if (sector_size == 0) { sector_size = 512; sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " "assuming 512.\n"); } if (sector_size != 512 && sector_size != 1024 && sector_size != 2048 && sector_size != 4096) { sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", sector_size); /* * The user might want to re-format the drive with * a supported sectorsize. Once this happens, it * would be relatively trivial to set the thing up. * For this reason, we leave the thing in the table. */ sdkp->capacity = 0; /* * set a bogus sector size so the normal read/write * logic in the block layer will eventually refuse any * request on this device without tripping over power * of two sector size assumptions */ sector_size = 512; } lim->logical_block_size = sector_size; lim->physical_block_size = sdkp->physical_block_size; sdkp->device->sector_size = sector_size; if (sdkp->capacity > 0xffffffff) sdp->use_16_for_rw = 1; } /* * Print disk capacity */ static void sd_print_capacity(struct scsi_disk *sdkp, sector_t old_capacity) { int sector_size = sdkp->device->sector_size; char cap_str_2[10], cap_str_10[10]; if (!sdkp->first_scan && old_capacity == sdkp->capacity) return; string_get_size(sdkp->capacity, sector_size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); string_get_size(sdkp->capacity, sector_size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); sd_printk(KERN_NOTICE, sdkp, "%llu %d-byte logical blocks: (%s/%s)\n", (unsigned long long)sdkp->capacity, sector_size, cap_str_10, cap_str_2); if (sdkp->physical_block_size != sector_size) sd_printk(KERN_NOTICE, sdkp, "%u-byte physical blocks\n", sdkp->physical_block_size); } /* called with buffer of length 512 */ static inline int sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, unsigned char *buffer, int len, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { /* * If we must use MODE SENSE(10), make sure that the buffer length * is at least 8 bytes so that the mode sense header fits. */ if (sdkp->device->use_10_for_ms && len < 8) len = 8; return scsi_mode_sense(sdkp->device, dbd, modepage, 0, buffer, len, SD_TIMEOUT, sdkp->max_retries, data, sshdr); } /* * read write protect setting, if possible - called only in sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) { int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; int old_wp = sdkp->write_prot; set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; } if (sdp->use_192_bytes_for_3f) { res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 192, &data, NULL); } else { /* * First attempt: ask for all pages (0x3F), but only 4 bytes. * We have to start carefully: some devices hang if we ask * for more than is available. */ res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 4, &data, NULL); /* * Second attempt: ask for page 0 When only page 0 is * implemented, a request for page 3F may return Sense Key * 5: Illegal Request, Sense Code 24: Invalid field in * CDB. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0, buffer, 4, &data, NULL); /* * Third attempt: ask 255 bytes, as we did earlier. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 255, &data, NULL); } if (res < 0) { sd_first_printk(KERN_WARNING, sdkp, "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); sd_printk(KERN_DEBUG, sdkp, "Mode Sense: %4ph\n", buffer); } } } /* * sd_read_cache_type - called only from sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) { int len = 0, res; struct scsi_device *sdp = sdkp->device; int dbd; int modepage; int first_len; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; int old_wce = sdkp->WCE; int old_rcd = sdkp->RCD; int old_dpofua = sdkp->DPOFUA; if (sdkp->cache_override) return; first_len = 4; if (sdp->skip_ms_page_8) { if (sdp->type == TYPE_RBC) goto defaults; else { if (sdp->skip_ms_page_3f) goto defaults; modepage = 0x3F; if (sdp->use_192_bytes_for_3f) first_len = 192; dbd = 0; } } else if (sdp->type == TYPE_RBC) { modepage = 6; dbd = 8; } else { modepage = 8; dbd = 0; } /* cautiously ask */ res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, first_len, &data, &sshdr); if (res < 0) goto bad_sense; if (!data.header_length) { modepage = 6; first_len = 0; sd_first_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); } /* that went OK, now ask for the proper length */ len = data.length; /* * We're only interested in the first three bytes, actually. * But the data cache page is defined for the first 20. */ if (len < 3) goto bad_sense; else if (len > SD_BUF_SIZE) { sd_first_printk(KERN_NOTICE, sdkp, "Truncating mode parameter " "data from %d to %d bytes\n", len, SD_BUF_SIZE); len = SD_BUF_SIZE; } if (modepage == 0x3F && sdp->use_192_bytes_for_3f) len = 192; /* Get the data */ if (len > first_len) res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, len, &data, &sshdr); if (!res) { int offset = data.header_length + data.block_descriptor_length; while (offset < len) { u8 page_code = buffer[offset] & 0x3F; u8 spf = buffer[offset] & 0x40; if (page_code == 8 || page_code == 6) { /* We're interested only in the first 3 bytes. */ if (len - offset <= 2) { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode parameter " "data\n"); goto defaults; } else { modepage = page_code; goto Page_found; } } else { /* Go to the next page */ if (spf && len - offset > 3) offset += 4 + (buffer[offset+2] << 8) + buffer[offset+3]; else if (!spf && len - offset > 1) offset += 2 + buffer[offset+1]; else { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode " "parameter data\n"); goto defaults; } } } sd_first_printk(KERN_WARNING, sdkp, "No Caching mode page found\n"); goto defaults; Page_found: if (modepage == 8) { sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); } else { sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); sdkp->RCD = 0; } sdkp->DPOFUA = (data.device_specific & 0x10) != 0; if (sdp->broken_fua) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n"); sdkp->DPOFUA = 0; } else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw && !sdkp->device->use_16_for_rw) { sd_first_printk(KERN_NOTICE, sdkp, "Uses READ/WRITE(6), disabling FUA\n"); sdkp->DPOFUA = 0; } /* No cache flush allowed for write protected devices */ if (sdkp->WCE && sdkp->write_prot) sdkp->WCE = 0; if (sdkp->first_scan || old_wce != sdkp->WCE || old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) sd_printk(KERN_NOTICE, sdkp, "Write cache: %s, read cache: %s, %s\n", sdkp->WCE ? "enabled" : "disabled", sdkp->RCD ? "disabled" : "enabled", sdkp->DPOFUA ? "supports DPO and FUA" : "doesn't support DPO or FUA"); return; } bad_sense: if (res == -EIO && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST && sshdr.asc == 0x24 && sshdr.ascq == 0x0) /* Invalid field in CDB */ sd_first_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); else sd_first_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); defaults: if (sdp->wce_default_on) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming drive cache: write back\n"); sdkp->WCE = 1; } else { sd_first_printk(KERN_WARNING, sdkp, "Assuming drive cache: write through\n"); sdkp->WCE = 0; } sdkp->RCD = 0; sdkp->DPOFUA = 0; } static bool sd_is_perm_stream(struct scsi_disk *sdkp, unsigned int stream_id) { u8 cdb[16] = { SERVICE_ACTION_IN_16, SAI_GET_STREAM_STATUS }; struct { struct scsi_stream_status_header h; struct scsi_stream_status s; } buf; struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int res; put_unaligned_be16(stream_id, &cdb[4]); put_unaligned_be32(sizeof(buf), &cdb[10]); res = scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, &buf, sizeof(buf), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res < 0) return false; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); if (res) return false; if (get_unaligned_be32(&buf.h.len) < sizeof(struct scsi_stream_status)) return false; return buf.h.stream_status[0].perm; } static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; const struct scsi_io_group_descriptor *desc, *start, *end; u16 permanent_stream_count_old; struct scsi_sense_hdr sshdr; struct scsi_mode_data data; int res; if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) return; res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0) return; start = (void *)buffer + data.header_length + 16; end = (void *)buffer + ALIGN_DOWN(data.header_length + data.length, sizeof(*end)); /* * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs * should assign the lowest numbered stream identifiers to permanent * streams. */ for (desc = start; desc < end; desc++) if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start)) break; permanent_stream_count_old = sdkp->permanent_stream_count; sdkp->permanent_stream_count = desc - start; if (sdkp->rscs && sdkp->permanent_stream_count < 2) sd_printk(KERN_INFO, sdkp, "Unexpected: RSCS has been set and the permanent stream count is %u\n", sdkp->permanent_stream_count); else if (sdkp->permanent_stream_count != permanent_stream_count_old) sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n", sdkp->permanent_stream_count); } /* * The ATO bit indicates whether the DIF application tag is available * for use by the operating system. */ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) { int res, offset; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return; if (sdkp->protection_type == 0) return; res = scsi_mode_sense(sdp, 1, 0x0a, 0, buffer, 36, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0 || !data.header_length || data.length < 6) { sd_first_printk(KERN_WARNING, sdkp, "getting Control mode page failed, assume no ATO\n"); if (res == -EIO && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return; } offset = data.header_length + data.block_descriptor_length; if ((buffer[offset] & 0x3f) != 0x0a) { sd_first_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); return; } if ((buffer[offset + 5] & 0x80) == 0) return; sdkp->ATO = 1; return; } static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { if (!sdkp->lbpme) return SD_LBP_FULL; if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) return SD_LBP_UNMAP; return SD_LBP_WS16; } /* LBP VPD page tells us what to use */ if (sdkp->lbpu && sdkp->max_unmap_blocks) return SD_LBP_UNMAP; if (sdkp->lbpws) return SD_LBP_WS16; if (sdkp->lbpws10) return SD_LBP_WS10; return SD_LBP_DISABLE; } /* * Query disk device for preferred I/O sizes. */ static void sd_read_block_limits(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb0); if (!vpd || vpd->len < 16) goto out; sdkp->min_xfer_blocks = get_unaligned_be16(&vpd->data[6]); sdkp->max_xfer_blocks = get_unaligned_be32(&vpd->data[8]); sdkp->opt_xfer_blocks = get_unaligned_be32(&vpd->data[12]); if (vpd->len >= 64) { unsigned int lba_count, desc_count; sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]); if (!sdkp->lbpme) goto config_atomic; lba_count = get_unaligned_be32(&vpd->data[20]); desc_count = get_unaligned_be32(&vpd->data[24]); if (lba_count && desc_count) sdkp->max_unmap_blocks = lba_count; sdkp->unmap_granularity = get_unaligned_be32(&vpd->data[28]); if (vpd->data[32] & 0x80) sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]); sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]); sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]); sd_config_atomic(sdkp, lim); } out: rcu_read_unlock(); } /* Parse the Block Limits Extension VPD page (0xb7) */ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); if (vpd && vpd->len >= 2) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } /* Query block device characteristics */ static void sd_read_block_characteristics(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; u16 rot; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); if (!vpd || vpd->len <= 8) { rcu_read_unlock(); return; } rot = get_unaligned_be16(&vpd->data[4]); sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); if (rot == 1) lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (!sdkp->first_scan) return; if (sdkp->device->type == TYPE_ZBC) sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); else if (sdkp->zoned == 1) sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); else if (sdkp->zoned == 2) sd_printk(KERN_NOTICE, sdkp, "Drive-managed SMR disk\n"); } /** * sd_read_block_provisioning - Query provisioning VPD page * @sdkp: disk to query */ static void sd_read_block_provisioning(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; if (sdkp->lbpme == 0) return; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb2); if (!vpd || vpd->len < 8) { rcu_read_unlock(); return; } sdkp->lbpvpd = 1; sdkp->lbpu = (vpd->data[5] >> 7) & 1; /* UNMAP */ sdkp->lbpws = (vpd->data[5] >> 6) & 1; /* WRITE SAME(16) w/ UNMAP */ sdkp->lbpws10 = (vpd->data[5] >> 5) & 1; /* WRITE SAME(10) w/ UNMAP */ rcu_read_unlock(); } static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (sdev->host->no_write_same) { sdev->no_write_same = 1; return; } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY, 0) < 0) { struct scsi_vpd *vpd; sdev->no_report_opcodes = 1; /* Disable WRITE SAME if REPORT SUPPORTED OPERATION * CODES is unsupported and the device has an ATA * Information VPD page (SAT). */ rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg89); if (vpd) sdev->no_write_same = 1; rcu_read_unlock(); } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16, 0) == 1) sdkp->ws16 = 1; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME, 0) == 1) sdkp->ws10 = 1; } static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (!sdev->security_supported) return; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_IN, 0) == 1 && scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_OUT, 0) == 1) sdkp->security = 1; } static inline sector_t sd64_to_sectors(struct scsi_disk *sdkp, u8 *buf) { return logical_to_sectors(sdkp->device, get_unaligned_be64(buf)); } /** * sd_read_cpr - Query concurrent positioning ranges * @sdkp: disk to query */ static void sd_read_cpr(struct scsi_disk *sdkp) { struct blk_independent_access_ranges *iars = NULL; unsigned char *buffer = NULL; unsigned int nr_cpr = 0; int i, vpd_len, buf_len = SD_BUF_SIZE; u8 *desc; /* * We need to have the capacity set first for the block layer to be * able to check the ranges. */ if (sdkp->first_scan) return; if (!sdkp->capacity) goto out; /* * Concurrent Positioning Ranges VPD: there can be at most 256 ranges, * leading to a maximum page size of 64 + 256*32 bytes. */ buf_len = 64 + 256*32; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer || scsi_get_vpd_page(sdkp->device, 0xb9, buffer, buf_len)) goto out; /* We must have at least a 64B header and one 32B range descriptor */ vpd_len = get_unaligned_be16(&buffer[2]) + 4; if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Ranges VPD page\n"); goto out; } nr_cpr = (vpd_len - 64) / 32; if (nr_cpr == 1) { nr_cpr = 0; goto out; } iars = disk_alloc_independent_access_ranges(sdkp->disk, nr_cpr); if (!iars) { nr_cpr = 0; goto out; } desc = &buffer[64]; for (i = 0; i < nr_cpr; i++, desc += 32) { if (desc[0] != i) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Range number\n"); nr_cpr = 0; break; } iars->ia_range[i].sector = sd64_to_sectors(sdkp, desc + 8); iars->ia_range[i].nr_sectors = sd64_to_sectors(sdkp, desc + 16); } out: disk_set_independent_access_ranges(sdkp->disk, iars); if (nr_cpr && sdkp->nr_actuators != nr_cpr) { sd_printk(KERN_NOTICE, sdkp, "%u concurrent positioning ranges\n", nr_cpr); sdkp->nr_actuators = nr_cpr; } kfree(buffer); } static bool sd_validate_min_xfer_size(struct scsi_disk *sdkp) { struct scsi_device *sdp = sdkp->device; unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->min_xfer_blocks == 0) return false; if (min_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Preferred minimum I/O size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", min_xfer_bytes, sdkp->physical_block_size); sdkp->min_xfer_blocks = 0; return false; } sd_first_printk(KERN_INFO, sdkp, "Preferred minimum I/O size %u bytes\n", min_xfer_bytes); return true; } /* * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, not a * multiple of the physical block size, or simply garbage. */ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int dev_max) { struct scsi_device *sdp = sdkp->device; unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->opt_xfer_blocks == 0) return false; if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> dev_max (%u logical blocks)\n", sdkp->opt_xfer_blocks, dev_max); return false; } if (sdkp->opt_xfer_blocks > SD_DEF_XFER_BLOCKS) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> sd driver limit (%u logical blocks)\n", sdkp->opt_xfer_blocks, SD_DEF_XFER_BLOCKS); return false; } if (opt_xfer_bytes < PAGE_SIZE) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes < " \ "PAGE_SIZE (%u bytes)\n", opt_xfer_bytes, (unsigned int)PAGE_SIZE); return false; } if (min_xfer_bytes && opt_xfer_bytes % min_xfer_bytes) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of preferred minimum block " \ "size (%u bytes)\n", opt_xfer_bytes, min_xfer_bytes); return false; } if (opt_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", opt_xfer_bytes, sdkp->physical_block_size); return false; } sd_first_printk(KERN_INFO, sdkp, "Optimal transfer size %u bytes\n", opt_xfer_bytes); return true; } static void sd_read_block_zero(struct scsi_disk *sdkp) { struct scsi_device *sdev = sdkp->device; unsigned int buf_len = sdev->sector_size; u8 *buffer, cmd[16] = { }; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer) return; if (sdev->use_16_for_rw) { cmd[0] = READ_16; put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ } else { cmd[0] = READ_10; put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ } scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, SD_TIMEOUT, sdkp->max_retries, NULL); kfree(buffer); } /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. * @disk: struct gendisk we care about **/ static int sd_revalidate_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; sector_t old_capacity = sdkp->capacity; struct queue_limits lim; unsigned char *buffer; unsigned int dev_max; int err; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); /* * If the device is offline, don't try and read capacity or any * of the other niceties. */ if (!scsi_device_online(sdp)) goto out; buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); if (!buffer) { sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " "allocation failure.\n"); goto out; } sd_spinup_disk(sdkp); lim = queue_limits_start_update(sdkp->disk->queue); /* * Without media there is no reason to ask; moreover, some devices * react badly if we do. */ if (sdkp->media_present) { sd_read_capacity(sdkp, &lim, buffer); /* * Some USB/UAS devices return generic values for mode pages * until the media has been accessed. Trigger a READ operation * to force the device to populate mode pages. */ if (sdp->read_before_ms) sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); sd_read_block_limits(sdkp, &lim); sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); } sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); sd_read_cache_type(sdkp, buffer); sd_read_io_hints(sdkp, buffer); sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); sd_config_protection(sdkp, &lim); } /* * We now have all cache related info, determine how we deal * with flush requests. */ sd_set_flush_flag(sdkp, &lim); /* Initial block count limit based on CDB TRANSFER LENGTH field size. */ dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS; /* Some devices report a maximum block count for READ/WRITE requests. */ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); if (sd_validate_min_xfer_size(sdkp)) lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); else lim.io_min = 0; /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; if (sd_validate_opt_xfer_size(sdkp, dev_max)) { lim.io_opt = min_not_zero(lim.io_opt, logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } sdkp->first_scan = 0; set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp, &lim); kfree(buffer); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; /* * Query concurrent positioning ranges after * queue_limits_commit_update() unlocked q->limits_lock to avoid * deadlock with q->sysfs_dir_lock and q->sysfs_lock. */ if (sdkp->media_present && scsi_device_supports_vpd(sdp)) sd_read_cpr(sdkp); /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) set_capacity_and_notify(disk, 0); out: return 0; } /** * sd_unlock_native_capacity - unlock native capacity * @disk: struct gendisk to set capacity for * * Block layer calls this function if it detects that partitions * on @disk reach beyond the end of the device. If the SCSI host * implements ->unlock_native_capacity() method, it's invoked to * give it a chance to adjust the device capacity. * * CONTEXT: * Defined by block layer. Might sleep. */ static void sd_unlock_native_capacity(struct gendisk *disk) { struct scsi_device *sdev = scsi_disk(disk)->device; if (sdev->host->hostt->unlock_native_capacity) sdev->host->hostt->unlock_native_capacity(sdev); } /** * sd_format_disk_name - format disk name * @prefix: name prefix - ie. "sd" for SCSI disks * @index: index of the disk to format name for * @buf: output buffer * @buflen: length of the output buffer * * SCSI disk names starts at sda. The 26th device is sdz and the * 27th is sdaa. The last one for two lettered suffix is sdzz * which is followed by sdaaa. * * This is basically 26 base counting with one extra 'nil' entry * at the beginning from the second digit on and can be * determined using similar method as 26 base conversion with the * index shifted -1 after each digit is computed. * * CONTEXT: * Don't care. * * RETURNS: * 0 on success, -errno on failure. */ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) { const int base = 'z' - 'a' + 1; char *begin = buf + strlen(prefix); char *end = buf + buflen; char *p; int unit; p = end - 1; *p = '\0'; unit = base; do { if (p == begin) return -EINVAL; *--p = 'a' + (index % unit); index = (index / unit) - 1; } while (index >= 0); memmove(begin, p, end - p); memcpy(buf, prefix, strlen(prefix)); return 0; } /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once * for each scsi device (not just disks) present. * @dev: pointer to device object * * Returns 0 if successful (or not interested in this scsi device * (e.g. scanner)); 1 when there is an error. * * Note: this function is invoked from the scsi mid-level. * This function sets up the mapping between a given * <host,channel,id,lun> (found in sdp) and new device name * (e.g. /dev/sda). More precisely it is the block device major * and minor number that is chosen here. * * Assume sd_probe is not re-entrant (for time being) * Also think about sd_probe() and sd_remove() running coincidentally. **/ static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); struct scsi_disk *sdkp; struct gendisk *gd; int index; int error; scsi_autopm_get_device(sdp); error = -ENODEV; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) goto out; if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) { sdev_printk(KERN_WARNING, sdp, "Unsupported ZBC host-managed device.\n"); goto out; } SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, "sd_probe\n")); error = -ENOMEM; sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); if (!sdkp) goto out; gd = blk_mq_alloc_disk_for_queue(sdp->request_queue, &sd_bio_compl_lkclass); if (!gd) goto out_free; index = ida_alloc(&sd_index_ida, GFP_KERNEL); if (index < 0) { sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); goto out_put; } error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); goto out_free_index; } sdkp->device = sdp; sdkp->disk = gd; sdkp->index = index; sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else blk_queue_rq_timeout(sdp->request_queue, SD_MOD_TIMEOUT); } device_initialize(&sdkp->disk_dev); sdkp->disk_dev.parent = get_device(dev); sdkp->disk_dev.class = &sd_disk_class; dev_set_name(&sdkp->disk_dev, "%s", dev_name(dev)); error = device_add(&sdkp->disk_dev); if (error) { put_device(&sdkp->disk_dev); goto out; } dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); gd->minors = SD_MINORS; gd->fops = &sd_fops; gd->private_data = sdkp; /* defaults, until the device tells us otherwise */ sdp->sector_size = 512; sdkp->capacity = 0; sdkp->media_present = 1; sdkp->write_prot = 0; sdkp->cache_override = 0; sdkp->WCE = 0; sdkp->RCD = 0; sdkp->ATO = 0; sdkp->first_scan = 1; sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS; sd_revalidate_disk(gd); if (sdp->removable) { gd->flags |= GENHD_FL_REMOVABLE; gd->events |= DISK_EVENT_MEDIA_CHANGE; gd->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT; } blk_pm_runtime_init(sdp->request_queue, dev); if (sdp->rpm_autosuspend) { pm_runtime_set_autosuspend_delay(dev, sdp->host->rpm_autosuspend_delay); } error = device_add_disk(dev, gd, NULL); if (error) { device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } if (sdkp->security) { sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit); if (sdkp->opal_dev) sd_printk(KERN_NOTICE, sdkp, "supports TCG Opal\n"); } sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); scsi_autopm_put_device(sdp); return 0; out_free_index: ida_free(&sd_index_ida, index); out_put: put_disk(gd); out_free: kfree(sdkp); out: scsi_autopm_put_device(sdp); return error; } /** * sd_remove - called whenever a scsi disk (previously recognized by * sd_probe) is detached from the system. It is called (potentially * multiple times) during sd module unload. * @dev: pointer to device object * * Note: this function is invoked from the scsi mid-level. * This function potentially frees up a device name (e.g. /dev/sdc) * that could be re-used by a subsequent sd_probe(). * This function is not called when the built-in sd driver is "exit-ed". **/ static int sd_remove(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); scsi_autopm_get_device(sdkp->device); device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); if (!sdkp->suspended) sd_shutdown(dev); put_disk(sdkp->disk); return 0; } static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); ida_free(&sd_index_ida, sdkp->index); put_device(&sdkp->device->sdev_gendev); free_opal_dev(sdkp->opal_dev); kfree(sdkp); } static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { /* Power on, reset, or bus device reset occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 0, .result = SAM_STAT_CHECK_CONDITION, }, { /* Power on occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 1, .result = SAM_STAT_CHECK_CONDITION, }, { /* SCSI bus reset */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 2, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .total_allowed = 3, .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .req_flags = BLK_MQ_REQ_PM, .failures = &failures, }; struct scsi_device *sdp = sdkp->device; int res; if (start) cmd[4] |= 1; /* START */ if (sdp->start_stop_pwr_cond) cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Start/Stop Unit failed", res); if (res > 0 && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* 0x3a is medium not present */ if (sshdr.asc == 0x3a) res = 0; } } /* SCSI error codes must not go to the generic layer */ if (res) return -EIO; return 0; } /* * Send a SYNCHRONIZE CACHE instruction down to the device through * the normal SCSI command structure. Wait for the command to * complete. */ static void sd_shutdown(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); if (!sdkp) return; /* this can happen */ if (pm_runtime_suspended(dev)) return; if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); sd_sync_cache(sdkp); } if ((system_state != SYSTEM_RESTART && sdkp->device->manage_system_start_stop) || (system_state == SYSTEM_POWER_OFF && sdkp->device->manage_shutdown)) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } } static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) { return (sdev->manage_system_start_stop && !runtime) || (sdev->manage_runtime_start_stop && runtime); } static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ return 0; if (sdkp->WCE && sdkp->media_present) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp); /* ignore OFFLINE device */ if (ret == -ENODEV) return 0; if (ret) return ret; } if (sd_do_start_stop(sdkp->device, runtime)) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (!runtime) ret = 0; } if (!ret) sdkp->suspended = true; return ret; } static int sd_suspend_system(struct device *dev) { if (pm_runtime_suspended(dev)) return 0; return sd_suspend_common(dev, false); } static int sd_suspend_runtime(struct device *dev) { return sd_suspend_common(dev, true); } static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; } return 0; } static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); sdkp->suspended = false; } return ret; } static int sd_resume_system(struct device *dev) { if (pm_runtime_suspended(dev)) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp = sdkp ? sdkp->device : NULL; if (sdp && sdp->force_runtime_start_on_system_start) pm_request_resume(dev); return 0; } return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; sdp = sdkp->device; if (sdp->ignore_media_change) { /* clear the device's sense data */ static const u8 cmd[10] = { REQUEST_SENSE }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; if (scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, sdp->request_queue->rq_timeout, 1, &exec_args)) sd_printk(KERN_NOTICE, sdkp, "Failed to clear sense data\n"); } return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { .suspend = sd_suspend_system, .resume = sd_resume_system, .poweroff = sd_suspend_system, .restore = sd_resume_system, .runtime_suspend = sd_suspend_runtime, .runtime_resume = sd_resume_runtime, }; static struct scsi_driver sd_template = { .gendrv = { .name = "sd", .probe = sd_probe, .probe_type = PROBE_PREFER_ASYNCHRONOUS, .remove = sd_remove, .shutdown = sd_shutdown, .pm = &sd_pm_ops, }, .rescan = sd_rescan, .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, .eh_action = sd_eh_action, .eh_reset = sd_eh_reset, }; /** * init_sd - entry point for this driver (both when built in or when * a module). * * Note: this function registers this driver with the scsi mid-level. **/ static int __init init_sd(void) { int majors = 0, i, err; SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); for (i = 0; i < SD_MAJORS; i++) { if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) continue; majors++; } if (!majors) return -ENODEV; err = class_register(&sd_disk_class); if (err) goto err_out; sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0); if (!sd_page_pool) { printk(KERN_ERR "sd: can't init discard page pool\n"); err = -ENOMEM; goto err_out_class; } err = scsi_register_driver(&sd_template.gendrv); if (err) goto err_out_driver; return 0; err_out_driver: mempool_destroy(sd_page_pool); err_out_class: class_unregister(&sd_disk_class); err_out: for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); return err; } /** * exit_sd - exit point for this driver (when it is a module). * * Note: this function unregisters this driver from the scsi mid-level. **/ static void __exit exit_sd(void) { int i; SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); scsi_unregister_driver(&sd_template.gendrv); mempool_destroy(sd_page_pool); class_unregister(&sd_disk_class); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); } module_init(init_sd); module_exit(exit_sd); void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { scsi_print_sense_hdr(sdkp->device, sdkp->disk ? sdkp->disk->disk_name : NULL, sshdr); } void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result) { const char *hb_string = scsi_hostbyte_string(result); if (hb_string) sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=%s driverbyte=%s\n", msg, hb_string ? hb_string : "invalid", "DRIVER_OK"); else sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=0x%02x driverbyte=%s\n", msg, host_byte(result), "DRIVER_OK"); }
1 2 8 2 6 8 8 9 9 8 1 1 8 9 9 10 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 // SPDX-License-Identifier: GPL-2.0-only /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Monitoring SMC transport protocol sockets * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/init.h> #include <linux/sock_diag.h> #include <linux/inet_diag.h> #include <linux/smc_diag.h> #include <net/netlink.h> #include <net/smc.h> #include "smc.h" #include "smc_core.h" #include "smc_ism.h" struct smc_diag_dump_ctx { int pos[2]; }; static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) { return (struct smc_diag_dump_ctx *)cb->ctx; } static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; if (sk->sk_protocol == SMCPROTO_SMC) { r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_protocol == SMCPROTO_SMC6) { memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, sizeof(smc->clcsock->sk->sk_v6_daddr)); #endif } } static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct smc_diag_msg *r, struct user_namespace *user_ns) { if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) return 1; r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->diag_inode = sock_i_ino(sk); return 0; } static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct smc_diag_req *req, struct nlattr *bc) { struct smc_sock *smc = smc_sk(sk); struct smc_diag_fallback fallback; struct user_namespace *user_ns; struct smc_diag_msg *r; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); smc_diag_msg_common_fill(r, sk); r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; fallback.reason = smc->fallback_rsn; fallback.peer_diagnosis = smc->peer_diagnosis; if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) goto errout; if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, .sndbuf_size = conn->sndbuf_desc ? conn->sndbuf_desc->len : 0, .rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0, .peer_rmbe_size = conn->peer_rmbe_size, .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, .rx_prod.count = conn->local_rx_ctrl.prod.count, .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap, .rx_cons.count = conn->local_rx_ctrl.cons.count, .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap, .tx_prod.count = conn->local_tx_ctrl.prod.count, .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap, .tx_cons.count = conn->local_tx_ctrl.cons.count, .tx_prod_flags = *(u8 *)&conn->local_tx_ctrl.prod_flags, .tx_conn_state_flags = *(u8 *)&conn->local_tx_ctrl.conn_state_flags, .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags, .rx_conn_state_flags = *(u8 *)&conn->local_rx_ctrl.conn_state_flags, .tx_prep.wrap = conn->tx_curs_prep.wrap, .tx_prep.count = conn->tx_curs_prep.count, .tx_sent.wrap = conn->tx_curs_sent.wrap, .tx_sent.count = conn->tx_curs_sent.count, .tx_fin.wrap = conn->tx_curs_fin.wrap, .tx_fin.count = conn->tx_curs_fin.count, }; if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0) goto errout; } if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, }; memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name, sizeof(link->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; struct smcd_gid smcd_gid; memset(&dinfo, 0, sizeof(dinfo)); dinfo.linkid = *((u32 *)conn->lgr->id); dinfo.peer_gid = conn->lgr->peer_gid.gid; dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext; smcd->ops->get_local_gid(smcd, &smcd_gid); dinfo.my_gid = smcd_gid.gid; dinfo.my_gid_ext = smcd_gid.gid_ext; dinfo.token = conn->rmb_desc->token; dinfo.peer_token = conn->peer_token; if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) goto errout; } nlmsg_end(skb, nlh); return 0; errout: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, struct netlink_callback *cb, int p_type) { struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb); struct net *net = sock_net(skb->sk); int snum = cb_ctx->pos[p_type]; struct nlattr *bc = NULL; struct hlist_head *head; int rc = 0, num = 0; struct sock *sk; read_lock(&prot->h.smc_hash->lock); head = &prot->h.smc_hash->ht; if (hlist_empty(head)) goto out; sk_for_each(sk, head) { if (!net_eq(sock_net(sk), net)) continue; if (num < snum) goto next; rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc); if (rc < 0) goto out; next: num++; } out: read_unlock(&prot->h.smc_hash->lock); cb_ctx->pos[p_type] = num; return rc; } static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int rc = 0; rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC); if (!rc) smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6); return skb->len; } static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { struct net *net = sock_net(skb->sk); if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { { struct netlink_dump_control c = { .dump = smc_diag_dump, .min_dump_alloc = SKB_WITH_OVERHEAD(32768), }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } } return 0; } static const struct sock_diag_handler smc_diag_handler = { .owner = THIS_MODULE, .family = AF_SMC, .dump = smc_diag_handler_dump, }; static int __init smc_diag_init(void) { return sock_diag_register(&smc_diag_handler); } static void __exit smc_diag_exit(void) { sock_diag_unregister(&smc_diag_handler); } module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
3690 658 2856 1219 1219 1726 522 126 3653 478 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 /* SPDX-License-Identifier: GPL-2.0 */ /* * Operations on the network namespace */ #ifndef __NET_NET_NAMESPACE_H #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> #include <linux/uidgid.h> #include <net/flow.h> #include <net/netns/core.h> #include <net/netns/mib.h> #include <net/netns/unix.h> #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> #include <net/netns/nexthop.h> #include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> #include <net/netns/netfilter.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #include <net/netns/flow_table.h> #endif #include <net/netns/nftables.h> #include <net/netns/xfrm.h> #include <net/netns/mpls.h> #include <net/netns/can.h> #include <net/netns/xdp.h> #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> #include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> #include <linux/notifier.h> #include <linux/xarray.h> struct user_namespace; struct proc_dir_entry; struct net_device; struct sock; struct ctl_table_header; struct net_generic; struct uevent_sock; struct netns_ipvs; struct bpf_prog; #define NETDEV_HASHBITS 8 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { /* First cache line can be often dirtied. * Do not place here read-mostly fields. */ refcount_t passive; /* To decide when the network * namespace should be freed. */ spinlock_t rules_mod_lock; unsigned int dev_base_seq; /* protected by rtnl_mutex */ u32 ifindex; spinlock_t nsid_lock; atomic_t fnhe_genid; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit * methods on dead net ( * pernet_ops_rwsem read locked), * or to unregister pernet ops * (pernet_ops_rwsem write locked). */ struct llist_node defer_free_list; struct llist_node cleanup_list; /* namespaces on death row */ #ifdef CONFIG_KEYS struct key_tag *key_domain; /* Key domain of operation tag */ #endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; struct idr netns_ids; struct ns_common ns; struct ref_tracker_dir refcnt_tracker; struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not * refcounted against netns */ struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; #ifdef CONFIG_SYSCTL struct ctl_table_set sysctls; #endif struct sock *rtnl; /* rtnetlink socket */ struct sock *genl_sock; struct uevent_sock *uevent_sock; /* uevent socket */ struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; struct xarray dev_by_index; struct raw_notifier_head netdev_chain; /* Note that @hash_mix can be read millions times per second, * it is critical that it is on a read_mostly cache line. */ u32 hash_mix; struct net_device *loopback_dev; /* The loopback */ /* core fib_rules */ struct list_head rules_ops; struct netns_core core; struct netns_mib mib; struct netns_packet packet; #if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; #endif struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; #endif #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) struct netns_ieee802154_lowpan ieee802154_lowpan; #endif #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) struct netns_sctp sctp; #endif #ifdef CONFIG_NETFILTER struct netns_nf nf; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) struct netns_nftables nft; #endif #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) struct netns_ft ft; #endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic __rcu *gen; /* Used to store attached BPF programs */ struct netns_bpf bpf; /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif u64 net_cookie; /* written once */ #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; #endif #if IS_ENABLED(CONFIG_MPLS) struct netns_mpls mpls; #endif #if IS_ENABLED(CONFIG_CAN) struct netns_can can; #endif #ifdef CONFIG_XDP_SOCKETS struct netns_xdp xdp; #endif #if IS_ENABLED(CONFIG_MCTP) struct netns_mctp mctp; #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) struct sock *crypto_nlsk; #endif struct sock *diag_nlsk; #if IS_ENABLED(CONFIG_SMC) struct netns_smc smc; #endif #ifdef CONFIG_DEBUG_NET_SMALL_RTNL /* Move to a better place when the config guard is removed. */ struct mutex rtnl_mutex; #endif } __randomize_layout; #include <linux/seq_file_net.h> /* Init's network namespace */ extern struct net init_net; #ifdef CONFIG_NET_NS struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); void net_ns_barrier(void); struct ns_common *get_net_ns(struct ns_common *ns); struct net *get_net_ns_by_fd(int fd); extern struct task_struct *cleanup_net_task; #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> static inline struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net) { if (flags & CLONE_NEWNET) return ERR_PTR(-EINVAL); return old_net; } static inline void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; } static inline void net_ns_barrier(void) {} static inline struct ns_common *get_net_ns(struct ns_common *ns) { return ERR_PTR(-EINVAL); } static inline struct net *get_net_ns_by_fd(int fd) { return ERR_PTR(-EINVAL); } #endif /* CONFIG_NET_NS */ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); void ipx_unregister_sysctl(void); #else #define ipx_register_sysctl() #define ipx_unregister_sysctl() #endif #ifdef CONFIG_NET_NS void __put_net(struct net *net); /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { refcount_inc(&net->ns.count); return net; } static inline struct net *maybe_get_net(struct net *net) { /* Used when we know struct net exists but we * aren't guaranteed a previous reference count * exists. If the reference count is zero this * function fails and returns NULL. */ if (!refcount_inc_not_zero(&net->ns.count)) net = NULL; return net; } /* Try using put_net_track() instead */ static inline void put_net(struct net *net) { if (refcount_dec_and_test(&net->ns.count)) __put_net(net); } static inline int net_eq(const struct net *net1, const struct net *net2) { return net1 == net2; } static inline int check_net(const struct net *net) { return refcount_read(&net->ns.count) != 0; } void net_drop_ns(void *); void net_passive_dec(struct net *net); #else static inline struct net *get_net(struct net *net) { return net; } static inline void put_net(struct net *net) { } static inline struct net *maybe_get_net(struct net *net) { return net; } static inline int net_eq(const struct net *net1, const struct net *net2) { return 1; } static inline int check_net(const struct net *net) { return 1; } #define net_drop_ns NULL static inline void net_passive_dec(struct net *net) { refcount_dec(&net->passive); } #endif static inline void net_passive_inc(struct net *net) { refcount_inc(&net->passive); } /* Returns true if the netns initialization is completed successfully */ static inline bool net_initialized(const struct net *net) { return READ_ONCE(net->list.next); } static inline void __netns_tracker_alloc(struct net *net, netns_tracker *tracker, bool refcounted, gfp_t gfp) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER ref_tracker_alloc(refcounted ? &net->refcnt_tracker : &net->notrefcnt_tracker, tracker, gfp); #endif } static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, gfp_t gfp) { __netns_tracker_alloc(net, tracker, true, gfp); } static inline void __netns_tracker_free(struct net *net, netns_tracker *tracker, bool refcounted) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER ref_tracker_free(refcounted ? &net->refcnt_tracker : &net->notrefcnt_tracker, tracker); #endif } static inline struct net *get_net_track(struct net *net, netns_tracker *tracker, gfp_t gfp) { get_net(net); netns_tracker_alloc(net, tracker, gfp); return net; } static inline void put_net_track(struct net *net, netns_tracker *tracker) { __netns_tracker_free(net, tracker, true); put_net(net); } typedef struct { #ifdef CONFIG_NET_NS struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference_protected(pnet->net, true); #else return &init_net; #endif } static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); #else return &init_net; #endif } /* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) #define for_each_net_continue_reverse(VAR) \ list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) #ifdef CONFIG_NET_NS #define __net_init #define __net_exit #define __net_initdata #define __net_initconst #else #define __net_init __init #define __net_exit __ref #define __net_initdata __initdata #define __net_initconst __initconst #endif int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); int peernet2id(const struct net *net, struct net *peer); bool peernet_has_id(const struct net *net, struct net *peer); struct net *get_net_ns_by_id(const struct net *net, int id); struct pernet_operations { struct list_head list; /* * Below methods are called without any exclusive locks. * More than one net may be constructed and destructed * in parallel on several cpus. Every pernet_operations * have to keep in mind all other pernet_operations and * to introduce a locking, if they share common resources. * * The only time they are called with exclusive lock is * from register_pernet_subsys(), unregister_pernet_subsys() * register_pernet_device() and unregister_pernet_device(). * * Exit methods using blocking RCU primitives, such as * synchronize_rcu(), should be implemented via exit_batch. * Then, destruction of a group of net requires single * synchronize_rcu() related to these pernet_operations, * instead of separate synchronize_rcu() for every net. * Please, avoid synchronize_rcu() at all, where it's possible. * * Note that a combination of pre_exit() and exit() can * be used, since a synchronize_rcu() is guaranteed between * the calls. */ int (*init)(struct net *net); void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); /* Following method is called with RTNL held. */ void (*exit_batch_rtnl)(struct list_head *net_exit_list, struct list_head *dev_kill_list); unsigned int * const id; const size_t size; }; /* * Use these carefully. If you implement a network device and it * needs per network namespace operations use device pernet operations, * otherwise use pernet subsys operations. * * Network interfaces need to be removed from a dying netns _before_ * subsys notifiers can be called, as most of the network code cleanup * (which is done from subsys notifiers) runs with the assumption that * dev_remove_pack has been called so no new packets will arrive during * and after the cleanup functions have been called. dev_remove_pack * is not per namespace so instead the guarantee of no more packets * arriving in a network namespace is provided by ensuring that all * network devices and all sockets have left the network namespace * before the cleanup methods are called. * * For the longest time the ipv4 icmp code was registered as a pernet * device which caused kernel oops, and panics during network * namespace cleanup. So please don't get this wrong. */ int register_pernet_subsys(struct pernet_operations *); void unregister_pernet_subsys(struct pernet_operations *); int register_pernet_device(struct pernet_operations *); void unregister_pernet_device(struct pernet_operations *); struct ctl_table; #define register_net_sysctl(net, path, table) \ register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table)) #ifdef CONFIG_SYSCTL int net_sysctl_init(void); struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, struct ctl_table *table, size_t table_size); void unregister_net_sysctl_table(struct ctl_table_header *header); #else static inline int net_sysctl_init(void) { return 0; } static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, struct ctl_table *table, size_t table_size) { return NULL; } static inline void unregister_net_sysctl_table(struct ctl_table_header *header) { } #endif static inline int rt_genid_ipv4(const struct net *net) { return atomic_read(&net->ipv4.rt_genid); } #if IS_ENABLED(CONFIG_IPV6) static inline int rt_genid_ipv6(const struct net *net) { return atomic_read(&net->ipv6.fib6_sernum); } #endif static inline void rt_genid_bump_ipv4(struct net *net) { atomic_inc(&net->ipv4.rt_genid); } extern void (*__fib6_flush_trees)(struct net *net); static inline void rt_genid_bump_ipv6(struct net *net) { if (__fib6_flush_trees) __fib6_flush_trees(net); } #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) static inline struct netns_ieee802154_lowpan * net_ieee802154_lowpan(struct net *net) { return &net->ieee802154_lowpan; } #endif /* For callers who don't really care about whether it's IPv4 or IPv6 */ static inline void rt_genid_bump_all(struct net *net) { rt_genid_bump_ipv4(net); rt_genid_bump_ipv6(net); } static inline int fnhe_genid(const struct net *net) { return atomic_read(&net->fnhe_genid); } static inline void fnhe_genid_bump(struct net *net) { atomic_inc(&net->fnhe_genid); } #ifdef CONFIG_NET void net_ns_init(void); #else static inline void net_ns_init(void) {} #endif #endif /* __NET_NET_NAMESPACE_H */
1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 // SPDX-License-Identifier: GPL-2.0+ /* * Support for dynamic clock devices * * Copyright (C) 2010 OMICRON electronics GmbH */ #include <linux/device.h> #include <linux/export.h> #include <linux/file.h> #include <linux/posix-clock.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/uaccess.h> #include "posix-timers.h" /* * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. */ static struct posix_clock *get_posix_clock(struct file *fp) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = pccontext->clk; down_read(&clk->rwsem); if (!clk->zombie) return clk; up_read(&clk->rwsem); return NULL; } static void put_posix_clock(struct posix_clock *clk) { up_read(&clk->rwsem); } static ssize_t posix_clock_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -EINVAL; if (!clk) return -ENODEV; if (clk->ops.read) err = clk->ops.read(pccontext, fp->f_flags, buf, count); put_posix_clock(clk); return err; } static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); __poll_t result = 0; if (!clk) return EPOLLERR; if (clk->ops.poll) result = clk->ops.poll(pccontext, fp, wait); put_posix_clock(clk); return result; } static long posix_clock_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; if (!clk) return -ENODEV; if (clk->ops.ioctl) err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); return err; } #ifdef CONFIG_COMPAT static long posix_clock_compat_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; if (!clk) return -ENODEV; if (clk->ops.ioctl) err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); return err; } #endif static int posix_clock_open(struct inode *inode, struct file *fp) { int err; struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); struct posix_clock_context *pccontext; down_read(&clk->rwsem); if (clk->zombie) { err = -ENODEV; goto out; } pccontext = kzalloc(sizeof(*pccontext), GFP_KERNEL); if (!pccontext) { err = -ENOMEM; goto out; } pccontext->clk = clk; pccontext->fp = fp; if (clk->ops.open) { err = clk->ops.open(pccontext, fp->f_mode); if (err) { kfree(pccontext); goto out; } } fp->private_data = pccontext; get_device(clk->dev); err = 0; out: up_read(&clk->rwsem); return err; } static int posix_clock_release(struct inode *inode, struct file *fp) { struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk; int err = 0; if (!pccontext) return -ENODEV; clk = pccontext->clk; if (clk->ops.release) err = clk->ops.release(pccontext); put_device(clk->dev); kfree(pccontext); fp->private_data = NULL; return err; } static const struct file_operations posix_clock_file_operations = { .owner = THIS_MODULE, .read = posix_clock_read, .poll = posix_clock_poll, .unlocked_ioctl = posix_clock_ioctl, .open = posix_clock_open, .release = posix_clock_release, #ifdef CONFIG_COMPAT .compat_ioctl = posix_clock_compat_ioctl, #endif }; int posix_clock_register(struct posix_clock *clk, struct device *dev) { int err; init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); err = cdev_device_add(&clk->cdev, dev); if (err) { pr_err("%s unable to add device %d:%d\n", dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); return err; } clk->cdev.owner = clk->ops.owner; clk->dev = dev; return 0; } EXPORT_SYMBOL_GPL(posix_clock_register); void posix_clock_unregister(struct posix_clock *clk) { cdev_device_del(&clk->cdev, clk->dev); down_write(&clk->rwsem); clk->zombie = true; up_write(&clk->rwsem); put_device(clk->dev); } EXPORT_SYMBOL_GPL(posix_clock_unregister); struct posix_clock_desc { struct file *fp; struct posix_clock *clk; }; static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd) { struct file *fp = fget(clockid_to_fd(id)); int err = -EINVAL; if (!fp) return err; if (fp->f_op->open != posix_clock_open || !fp->private_data) goto out; cd->fp = fp; cd->clk = get_posix_clock(fp); err = cd->clk ? 0 : -ENODEV; out: if (err) fput(fp); return err; } static void put_clock_desc(struct posix_clock_desc *cd) { put_posix_clock(cd->clk); fput(cd->fp); } static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx) { struct posix_clock_desc cd; int err; err = get_clock_desc(id, &cd); if (err) return err; if (tx->modes && (cd.fp->f_mode & FMODE_WRITE) == 0) { err = -EACCES; goto out; } if (cd.clk->ops.clock_adjtime) err = cd.clk->ops.clock_adjtime(cd.clk, tx); else err = -EOPNOTSUPP; out: put_clock_desc(&cd); return err; } static int pc_clock_gettime(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; err = get_clock_desc(id, &cd); if (err) return err; if (cd.clk->ops.clock_gettime) err = cd.clk->ops.clock_gettime(cd.clk, ts); else err = -EOPNOTSUPP; put_clock_desc(&cd); return err; } static int pc_clock_getres(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; err = get_clock_desc(id, &cd); if (err) return err; if (cd.clk->ops.clock_getres) err = cd.clk->ops.clock_getres(cd.clk, ts); else err = -EOPNOTSUPP; put_clock_desc(&cd); return err; } static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) { struct posix_clock_desc cd; int err; if (!timespec64_valid_strict(ts)) return -EINVAL; err = get_clock_desc(id, &cd); if (err) return err; if ((cd.fp->f_mode & FMODE_WRITE) == 0) { err = -EACCES; goto out; } if (cd.clk->ops.clock_settime) err = cd.clk->ops.clock_settime(cd.clk, ts); else err = -EOPNOTSUPP; out: put_clock_desc(&cd); return err; } const struct k_clock clock_posix_dynamic = { .clock_getres = pc_clock_getres, .clock_set = pc_clock_settime, .clock_get_timespec = pc_clock_gettime, .clock_adj = pc_clock_adjtime, };
3 45 109 109 109 109 24 84 122 3 116 3 1 2 3 3 69 67 23 39 1 6 2 2 1 1 20 20 20 20 20 20 796 778 15 16 16 16 162 156 3 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 // SPDX-License-Identifier: GPL-2.0-or-later /* * lwtunnel Infrastructure for light weight tunnels like mpls * * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> */ #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/lwtunnel.h> #include <linux/in.h> #include <linux/init.h> #include <linux/err.h> #include <net/lwtunnel.h> #include <net/rtnetlink.h> #include <net/ip6_fib.h> #include <net/rtnh.h> DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); #ifdef CONFIG_MODULES static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) { /* Only lwt encaps implemented without using an interface for * the encap need to return a string here. */ switch (encap_type) { case LWTUNNEL_ENCAP_MPLS: return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; case LWTUNNEL_ENCAP_SEG6: return "SEG6"; case LWTUNNEL_ENCAP_BPF: return "BPF"; case LWTUNNEL_ENCAP_SEG6_LOCAL: return "SEG6LOCAL"; case LWTUNNEL_ENCAP_RPL: return "RPL"; case LWTUNNEL_ENCAP_IOAM6: return "IOAM6"; case LWTUNNEL_ENCAP_XFRM: /* module autoload not supported for encap type */ return NULL; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ WARN_ON(1); break; } return NULL; } #endif /* CONFIG_MODULES */ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) { struct lwtunnel_state *lws; lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); return lws; } EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); static const struct lwtunnel_encap_ops __rcu * lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, unsigned int num) { if (num > LWTUNNEL_ENCAP_MAX) return -ERANGE; return !cmpxchg((const struct lwtunnel_encap_ops **) &lwtun_encaps[num], NULL, ops) ? 0 : -1; } EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, unsigned int encap_type) { int ret; if (encap_type == LWTUNNEL_ENCAP_NONE || encap_type > LWTUNNEL_ENCAP_MAX) return -ERANGE; ret = (cmpxchg((const struct lwtunnel_encap_ops **) &lwtun_encaps[encap_type], ops, NULL) == ops) ? 0 : -1; synchronize_net(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; bool found = false; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || encap_type > LWTUNNEL_ENCAP_MAX) { NL_SET_ERR_MSG_ATTR(extack, encap, "Unknown LWT encapsulation type"); return ret; } ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); if (likely(ops && ops->build_state && try_module_get(ops->owner))) found = true; rcu_read_unlock(); if (found) { ret = ops->build_state(net, encap, family, cfg, lws, extack); if (ret) module_put(ops->owner); } else { /* don't rely on -EOPNOTSUPP to detect match as build_state * handlers could return it */ NL_SET_ERR_MSG_ATTR(extack, encap, "LWT encapsulation type not supported"); } return ret; } EXPORT_SYMBOL_GPL(lwtunnel_build_state); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, bool rtnl_is_held) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; if (encap_type == LWTUNNEL_ENCAP_NONE || encap_type > LWTUNNEL_ENCAP_MAX) { NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); return ret; } ops = rcu_access_pointer(lwtun_encaps[encap_type]); #ifdef CONFIG_MODULES if (!ops) { const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { if (rtnl_is_held) __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); if (rtnl_is_held) rtnl_lock(); ops = rcu_access_pointer(lwtun_encaps[encap_type]); } } #endif ret = ops ? 0 : -EOPNOTSUPP; if (ret < 0) NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, struct netlink_ext_ack *extack, bool rtnl_is_held) { struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; struct nlattr *attrs; u16 encap_type; int attrlen; while (rtnh_ok(rtnh, remaining)) { attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { attrs = rtnh_attrs(rtnh); nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { if (nla_len(nla_entype) < sizeof(u16)) { NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); return -EINVAL; } encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, extack, rtnl_is_held) != 0) return -EOPNOTSUPP; } } rtnh = rtnh_next(rtnh, &remaining); } return 0; } EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); void lwtstate_free(struct lwtunnel_state *lws) { const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; if (ops->destroy_state) { ops->destroy_state(lws); kfree_rcu(lws, rcu); } else { kfree(lws); } module_put(ops->owner); } EXPORT_SYMBOL_GPL(lwtstate_free); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr) { const struct lwtunnel_encap_ops *ops; struct nlattr *nest; int ret; if (!lwtstate) return 0; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; nest = nla_nest_start_noflag(skb, encap_attr); if (!nest) return -EMSGSIZE; ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->fill_encap)) ret = ops->fill_encap(skb, lwtstate); rcu_read_unlock(); if (ret) goto nla_put_failure; nla_nest_end(skb, nest); ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); if (ret) goto nla_put_failure; return 0; nla_put_failure: nla_nest_cancel(skb, nest); return (ret == -EOPNOTSUPP ? 0 : ret); } EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; int ret = 0; if (!lwtstate) return 0; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->get_encap_size)) ret = nla_total_size(ops->get_encap_size(lwtstate)); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { const struct lwtunnel_encap_ops *ops; int ret = 0; if (!a && !b) return 0; if (!a || !b) return 1; if (a->type != b->type) return 1; if (a->type == LWTUNNEL_ENCAP_NONE || a->type > LWTUNNEL_ENCAP_MAX) return 0; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[a->type]); if (likely(ops && ops->cmp_encap)) ret = ops->cmp_encap(a, b); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; int ret = -EINVAL; if (!dst) goto drop; lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->output)) ret = ops->output(net, sk, skb); rcu_read_unlock(); if (ret == -EOPNOTSUPP) goto drop; return ret; drop: kfree_skb(skb); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; int ret = -EINVAL; if (!dst) goto drop; lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->xmit)) ret = ops->xmit(skb); rcu_read_unlock(); if (ret == -EOPNOTSUPP) goto drop; return ret; drop: kfree_skb(skb); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; int ret = -EINVAL; if (!dst) goto drop; lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->input)) ret = ops->input(skb); rcu_read_unlock(); if (ret == -EOPNOTSUPP) goto drop; return ret; drop: kfree_skb(skb); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_input);
1564 1570 1569 1342 1567 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/symlink.c - kernfs symlink implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/gfp.h> #include <linux/namei.h> #include "kernfs-internal.h" /** * kernfs_create_link - create a symlink * @parent: directory to create the symlink in * @name: name of the symlink * @target: target node for the symlink to point to * * Return: the created node on success, ERR_PTR() value on error. * Ownership of the link matches ownership of the target. */ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { struct kernfs_node *kn; int error; kuid_t uid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID; if (target->iattr) { uid = target->iattr->ia_uid; gid = target->iattr->ia_gid; } kn = kernfs_new_node(parent, name, S_IFLNK|0777, uid, gid, KERNFS_LINK); if (!kn) return ERR_PTR(-ENOMEM); if (kernfs_ns_enabled(parent)) kn->ns = target->ns; kn->symlink.target_kn = target; kernfs_get(target); /* ref owned by symlink */ error = kernfs_add_one(kn); if (!error) return kn; kernfs_put(kn); return ERR_PTR(error); } static int kernfs_get_target_path(struct kernfs_node *parent, struct kernfs_node *target, char *path) { struct kernfs_node *base, *kn; char *s = path; int len = 0; /* go up to the root, stop at the base */ base = parent; while (base->parent) { kn = target->parent; while (kn->parent && base != kn) kn = kn->parent; if (base == kn) break; if ((s - path) + 3 >= PATH_MAX) return -ENAMETOOLONG; strcpy(s, "../"); s += 3; base = base->parent; } /* determine end of target string for reverse fillup */ kn = target; while (kn->parent && kn != base) { len += strlen(kn->name) + 1; kn = kn->parent; } /* check limits */ if (len < 2) return -EINVAL; len--; if ((s - path) + len >= PATH_MAX) return -ENAMETOOLONG; /* reverse fillup of target string from target to base */ kn = target; while (kn->parent && kn != base) { int slen = strlen(kn->name); len -= slen; memcpy(s + len, kn->name, slen); if (len) s[--len] = '/'; kn = kn->parent; } return 0; } static int kernfs_getlink(struct inode *inode, char *path) { struct kernfs_node *kn = inode->i_private; struct kernfs_node *parent = kn->parent; struct kernfs_node *target = kn->symlink.target_kn; struct kernfs_root *root = kernfs_root(parent); int error; down_read(&root->kernfs_rwsem); error = kernfs_get_target_path(parent, target, path); up_read(&root->kernfs_rwsem); return error; } static const char *kernfs_iop_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { char *body; int error; if (!dentry) return ERR_PTR(-ECHILD); body = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!body) return ERR_PTR(-ENOMEM); error = kernfs_getlink(inode, body); if (unlikely(error < 0)) { kfree(body); return ERR_PTR(error); } set_delayed_call(done, kfree_link, body); return body; } const struct inode_operations kernfs_symlink_iops = { .listxattr = kernfs_iop_listxattr, .get_link = kernfs_iop_get_link, .setattr = kernfs_iop_setattr, .getattr = kernfs_iop_getattr, .permission = kernfs_iop_permission, };
315 183 1020 186 38 38 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RCULIST_NULLS_H #define _LINUX_RCULIST_NULLS_H #ifdef __KERNEL__ /* * RCU-protected list version */ #include <linux/list_nulls.h> #include <linux/rcupdate.h> /** * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on the node return true after this. It is * useful for RCU based read lockfree traversal if the writer side * must know if the list entry is still hashed or already unhashed. * * In particular, it means that we can not poison the forward pointers * that may still be used for walking the hash list and we can only * zero the pprev pointer so list_unhashed() will return true after * this. * * The caller must take whatever precautions are necessary (such as * holding appropriate locks) to avoid racing with another * list-mutation primitive, such as hlist_nulls_add_head_rcu() or * hlist_nulls_del_rcu(), running on this same list. However, it is * perfectly legal to run concurrently with the _rcu list-traversal * primitives, such as hlist_nulls_for_each_entry_rcu(). */ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, NULL); } } /** * hlist_nulls_first_rcu - returns the first element of the hash list. * @head: the head of the list. */ #define hlist_nulls_first_rcu(head) \ (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) /** * hlist_nulls_next_rcu - returns the element of the list after @node. * @node: element of the list. */ #define hlist_nulls_next_rcu(node) \ (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the hash list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry(). */ static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, LIST_POISON2); } /** * hlist_nulls_add_head_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *first = h->first; WRITE_ONCE(n->next, first); WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) WRITE_ONCE(first->pprev, &n->next); } /** * hlist_nulls_add_tail_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *i, *last = NULL; /* Note: write side code, so rcu accessors are not needed. */ for (i = h->first; !is_a_nulls(i); i = i->next) last = i; if (last) { WRITE_ONCE(n->next, last->next); n->pprev = &last->next; rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { hlist_nulls_add_head_rcu(n, h); } } /* after that hlist_nulls_del will work */ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) { n->pprev = &n->next; n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); } /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] * [1] Documentation/memory-barriers.txt around line 1533 * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) /** * hlist_nulls_for_each_entry_safe - * iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. */ #define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif
19568 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H extern struct fpstate init_fpstate; /* CPU feature check wrappers */ static __always_inline __pure bool use_xsave(void) { return cpu_feature_enabled(X86_FEATURE_XSAVE); } static __always_inline __pure bool use_fxsr(void) { return cpu_feature_enabled(X86_FEATURE_FXSR); } #ifdef CONFIG_X86_DEBUG_FPU # define WARN_ON_FPU(x) WARN_ON_ONCE(x) #else # define WARN_ON_FPU(x) ({ (void)(x); 0; }) #endif /* Used in init.c */ extern void fpstate_init_user(struct fpstate *fpstate); extern void fpstate_reset(struct fpu *fpu); #endif
111 111 111 111 111 111 111 111 111 111 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 // SPDX-License-Identifier: GPL-2.0-or-later /* Maintain an RxRPC server socket to do AFS communications through * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/sched/signal.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" #define RXRPC_TRACE_ONLY_DEFINE_ENUMS #include <trace/events/rxrpc.h> struct workqueue_struct *afs_async_calls; static void afs_deferred_free_worker(struct work_struct *work); static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); static int afs_deliver_cm_op_id(struct afs_call *); /* asynchronous incoming call initial processing */ static const struct afs_call_type afs_RXCMxxxx = { .name = "CB.xxxx", .deliver = afs_deliver_cm_op_id, }; /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT */ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; int ret; _enter(""); ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); if (ret < 0) goto error_1; socket->sk->sk_allocation = GFP_NOFS; /* bind the callback manager's address to make this a server socket */ memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = CM_SERVICE; srx.transport_type = SOCK_DGRAM; srx.transport_len = sizeof(srx.transport.sin6); srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); ret = rxrpc_sock_set_min_security_level(socket->sk, RXRPC_SECURITY_ENCRYPT); if (ret < 0) goto error_2; ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); } if (ret < 0) goto error_2; srx.srx_service = YFS_CM_SERVICE; ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret < 0) goto error_2; /* Ideally, we'd turn on service upgrade here, but we can't because * OpenAFS is buggy and leaks the userStatus field from packet to * packet and between FS packets and CB packets - so if we try to do an * upgrade on an FS packet, OpenAFS will leak that into the CB packet * it sends back to us. */ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, afs_rx_discard_new_call); ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; net->socket = socket; afs_charge_preallocation(&net->charge_preallocation_work); _leave(" = 0"); return 0; error_2: sock_release(socket); error_1: _leave(" = %d", ret); return ret; } /* * close the RxRPC socket AFS was using */ void afs_close_socket(struct afs_net *net) { _enter(""); kernel_listen(net->socket, 0); flush_workqueue(afs_async_calls); if (net->spare_incoming_call) { afs_put_call(net->spare_incoming_call); net->spare_incoming_call = NULL; } _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls)); wait_var_event(&net->nr_outstanding_calls, !atomic_read(&net->nr_outstanding_calls)); _debug("no outstanding calls"); kernel_sock_shutdown(net->socket, SHUT_RDWR); flush_workqueue(afs_async_calls); sock_release(net->socket); _debug("dework"); _leave(""); } /* * Allocate a call. */ static struct afs_call *afs_alloc_call(struct afs_net *net, const struct afs_call_type *type, gfp_t gfp) { struct afs_call *call; int o; call = kzalloc(sizeof(*call), gfp); if (!call) return NULL; call->type = type; call->net = net; call->debug_id = atomic_inc_return(&rxrpc_debug_id); refcount_set(&call->ref, 1); INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call); INIT_WORK(&call->work, call->type->work); INIT_WORK(&call->free_work, afs_deferred_free_worker); init_waitqueue_head(&call->waitq); spin_lock_init(&call->state_lock); call->iter = &call->def_iter; o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call->debug_id, afs_call_trace_alloc, 1, o, __builtin_return_address(0)); return call; } static void afs_free_call(struct afs_call *call) { struct afs_net *net = call->net; int o; ASSERT(!work_pending(&call->async_work)); rxrpc_kernel_put_peer(call->peer); if (call->rxcall) { rxrpc_kernel_shutdown_call(net->socket, call->rxcall); rxrpc_kernel_put_call(net->socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) call->type->destructor(call); afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); kfree(call->request); o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, __builtin_return_address(0)); kfree(call); o = atomic_dec_return(&net->nr_outstanding_calls); if (o == 0) wake_up_var(&net->nr_outstanding_calls); } /* * Dispose of a reference on a call. */ void afs_put_call(struct afs_call *call) { struct afs_net *net = call->net; unsigned int debug_id = call->debug_id; bool zero; int r, o; zero = __refcount_dec_and_test(&call->ref, &r); o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, __builtin_return_address(0)); if (zero) afs_free_call(call); } static void afs_deferred_free_worker(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, free_work); afs_free_call(call); } /* * Dispose of a reference on a call, deferring the cleanup to a workqueue * to avoid lock recursion. */ void afs_deferred_put_call(struct afs_call *call) { struct afs_net *net = call->net; unsigned int debug_id = call->debug_id; bool zero; int r, o; zero = __refcount_dec_and_test(&call->ref, &r); o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, __builtin_return_address(0)); if (zero) schedule_work(&call->free_work); } /* * Queue the call for actual work. */ static void afs_queue_call_work(struct afs_call *call) { if (call->type->work) { afs_get_call(call, afs_call_trace_work); if (!queue_work(afs_wq, &call->work)) afs_put_call(call); } } /* * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(struct afs_net *net, const struct afs_call_type *type, size_t request_size, size_t reply_max) { struct afs_call *call; call = afs_alloc_call(net, type, GFP_NOFS); if (!call) goto nomem_call; if (request_size) { call->request_size = request_size; call->request = kmalloc(request_size, GFP_NOFS); if (!call->request) goto nomem_free; } if (reply_max) { call->reply_max = reply_max; call->buffer = kmalloc(reply_max, GFP_NOFS); if (!call->buffer) goto nomem_free; } afs_extract_to_buf(call, call->reply_max); call->operation_ID = type->op; init_waitqueue_head(&call->waitq); return call; nomem_free: afs_put_call(call); nomem_call: return NULL; } /* * clean up a call with flat buffer */ void afs_flat_call_destructor(struct afs_call *call) { _enter(""); kfree(call->request); call->request = NULL; kfree(call->buffer); call->buffer = NULL; } /* * Advance the AFS call state when the RxRPC call ends the transmit phase. */ static void afs_notify_end_request_tx(struct sock *sock, struct rxrpc_call *rxcall, unsigned long call_user_ID) { struct afs_call *call = (struct afs_call *)call_user_ID; afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY); } /* * Initiate a call and synchronously queue up the parameters for dispatch. Any * error is stored into the call struct, which the caller must check for. */ void afs_make_call(struct afs_call *call, gfp_t gfp) { struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; size_t len; s64 tx_total_len; int ret; _enter(",{%pISp+%u},", rxrpc_kernel_remote_addr(call->peer), call->service_id); ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); _debug("____MAKE %p{%s,%x} [%d]____", call, call->type->name, key_serial(call->key), atomic_read(&call->net->nr_outstanding_calls)); trace_afs_make_call(call); /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data * after the initial fixed part. */ tx_total_len = call->request_size; if (call->write_iter) tx_total_len += iov_iter_count(call->write_iter); /* If the call is going to be asynchronous, we need an extra ref for * the call to hold itself so the caller need not hang on to its ref. */ if (call->async) { afs_get_call(call, afs_call_trace_get); call->drop_ref = true; } /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, call->peer, call->key, (unsigned long)call, tx_total_len, call->max_lifespan, gfp, (call->async ? afs_wake_up_async_call : afs_wake_up_call_waiter), call->service_id, call->upgrade, (call->intr ? RXRPC_PREINTERRUPTIBLE : RXRPC_UNINTERRUPTIBLE), call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); call->error = ret; goto error_kill_call; } call->rxcall = rxcall; call->issue_time = ktime_get_real(); /* send the request */ iov[0].iov_base = call->request; iov[0].iov_len = call->request_size; msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0); ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, afs_notify_end_request_tx); if (ret < 0) goto error_do_abort; if (call->write_iter) { msg.msg_iter = *call->write_iter; msg.msg_flags &= ~MSG_MORE; trace_afs_send_data(call, &msg); ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, &msg, iov_iter_count(&msg.msg_iter), afs_notify_end_request_tx); *call->write_iter = msg.msg_iter; trace_afs_sent_data(call, &msg, ret); if (ret < 0) goto error_do_abort; } /* Note that at this point, we may have received the reply or an abort * - and an asynchronous call may already have completed. * * afs_wait_for_call_to_complete(call) * must be called to synchronously clean up. */ return; error_do_abort: if (ret != -ECONNABORTED) rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, afs_abort_send_data_error); if (call->async) { afs_see_call(call, afs_call_trace_async_abort); return; } if (ret == -ECONNABORTED) { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, &msg.msg_iter, &len, false, &call->abort_code, &call->service_id); call->responded = true; } call->error = ret; trace_afs_call_done(call); error_kill_call: if (call->async) afs_see_call(call, afs_call_trace_async_kill); if (call->type->immediate_cancel) call->type->immediate_cancel(call); /* We need to dispose of the extra ref we grabbed for an async call. * The call, however, might be queued on afs_async_calls and we need to * make sure we don't get any more notifications that might requeue it. */ if (call->rxcall) rxrpc_kernel_shutdown_call(call->net->socket, call->rxcall); if (call->async) { if (cancel_work_sync(&call->async_work)) afs_put_call(call); afs_set_call_complete(call, ret, 0); } call->error = ret; call->state = AFS_CALL_COMPLETE; _leave(" = %d", ret); } /* * Log remote abort codes that indicate that we have a protocol disagreement * with the server. */ static void afs_log_error(struct afs_call *call, s32 remote_abort) { static int max = 0; const char *msg; int m; switch (remote_abort) { case RX_EOF: msg = "unexpected EOF"; break; case RXGEN_CC_MARSHAL: msg = "client marshalling"; break; case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break; case RXGEN_SS_MARSHAL: msg = "server marshalling"; break; case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break; case RXGEN_DECODE: msg = "opcode decode"; break; case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break; case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break; case -32: msg = "insufficient data"; break; default: return; } m = max; if (m < 3) { max = m + 1; pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n", msg, call->type->name, rxrpc_kernel_remote_addr(call->peer)); } } /* * deliver messages to a call */ void afs_deliver_to_call(struct afs_call *call) { enum afs_call_state state; size_t len; u32 abort_code, remote_abort = 0; int ret; _enter("%s", call->type->name); while (state = READ_ONCE(call->state), state == AFS_CALL_CL_AWAIT_REPLY || state == AFS_CALL_SV_AWAIT_OP_ID || state == AFS_CALL_SV_AWAIT_REQUEST || state == AFS_CALL_SV_AWAIT_ACK ) { if (state == AFS_CALL_SV_AWAIT_ACK) { len = 0; iov_iter_kvec(&call->def_iter, ITER_DEST, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, call->rxcall, &call->def_iter, &len, false, &remote_abort, &call->service_id); trace_afs_receive_data(call, &call->def_iter, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; if (ret < 0 || ret == 1) { if (ret == 1) ret = 0; goto call_complete; } return; } ret = call->type->deliver(call); state = READ_ONCE(call->state); if (ret == 0 && call->unmarshalling_error) ret = -EBADMSG; switch (ret) { case 0: call->responded = true; afs_queue_call_work(call); if (state == AFS_CALL_CL_PROC_REPLY) { if (call->op) set_bit(AFS_SERVER_FL_MAY_HAVE_CB, &call->op->server->flags); goto call_complete; } ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: case -EAGAIN: goto out; case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); call->responded = true; afs_log_error(call, call->abort_code); goto done; case -ENOTSUPP: call->responded = true; abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, afs_abort_op_not_supported); goto local_abort; case -EIO: pr_err("kAFS: Call %u in bad state %u\n", call->debug_id, state); fallthrough; case -ENODATA: case -EBADMSG: case -EMSGSIZE: case -ENOMEM: case -EFAULT: abort_code = RXGEN_CC_UNMARSHAL; if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, afs_abort_unmarshal_error); goto local_abort; default: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, afs_abort_general_error); goto local_abort; } } done: if (call->type->done) call->type->done(call); out: _leave(""); return; local_abort: abort_code = 0; call_complete: afs_set_call_complete(call, ret, remote_abort); goto done; } /* * Wait synchronously for a call to complete. */ void afs_wait_for_call_to_complete(struct afs_call *call) { bool rxrpc_complete = false; _enter(""); if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { DECLARE_WAITQUEUE(myself, current); add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); /* deliver any messages that are in the queue */ if (!afs_check_call_state(call, AFS_CALL_COMPLETE) && call->need_attention) { call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) { /* rxrpc terminated the call. */ rxrpc_complete = true; break; } schedule(); } remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); } if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { if (rxrpc_complete) { afs_set_call_complete(call, call->error, call->abort_code); } else { /* Kill off the call if it's still live. */ _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, RX_USER_ABORT, -EINTR, afs_abort_interrupted)) afs_set_call_complete(call, -EINTR, 0); } } } /* * wake up a waiting call */ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, unsigned long call_user_ID) { struct afs_call *call = (struct afs_call *)call_user_ID; call->need_attention = true; wake_up(&call->waitq); } /* * Wake up an asynchronous call. The caller is holding the call notify * spinlock around this, so we can't call afs_put_call(). */ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long call_user_ID) { struct afs_call *call = (struct afs_call *)call_user_ID; int r; trace_afs_notify_call(rxcall, call); call->need_attention = true; if (__refcount_inc_not_zero(&call->ref, &r)) { trace_afs_call(call->debug_id, afs_call_trace_wake, r + 1, atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); if (!queue_work(afs_async_calls, &call->async_work)) afs_deferred_put_call(call); } } /* * Perform I/O processing on an asynchronous call. The work item carries a ref * to the call struct that we either need to release or to pass on. */ static void afs_process_async_call(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, async_work); _enter(""); if (call->state < AFS_CALL_COMPLETE && call->need_attention) { call->need_attention = false; afs_deliver_to_call(call); } afs_put_call(call); _leave(""); } static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_call *call = (struct afs_call *)user_call_ID; call->rxcall = rxcall; } /* * Charge the incoming call preallocation. */ void afs_charge_preallocation(struct work_struct *work) { struct afs_net *net = container_of(work, struct afs_net, charge_preallocation_work); struct afs_call *call = net->spare_incoming_call; for (;;) { if (!call) { call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL); if (!call) break; call->drop_ref = true; call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); afs_extract_to_tmp(call); } if (rxrpc_kernel_charge_accept(net->socket, afs_wake_up_async_call, afs_rx_attach, (unsigned long)call, GFP_KERNEL, call->debug_id) < 0) break; call = NULL; } net->spare_incoming_call = call; } /* * Discard a preallocated call when a socket is shut down. */ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_call *call = (struct afs_call *)user_call_ID; call->rxcall = NULL; afs_put_call(call); } /* * Notification of an incoming call. */ static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_net *net = afs_sock2net(sk); queue_work(afs_wq, &net->charge_preallocation_work); } /* * Grab the operation ID from an incoming cache manager call. The socket * buffer is discarded on error or if we don't yet have sufficient data. */ static int afs_deliver_cm_op_id(struct afs_call *call) { int ret; _enter("{%zu}", iov_iter_count(call->iter)); /* the operation ID forms the first four bytes of the request data */ ret = afs_extract_data(call, true); if (ret < 0) return ret; call->operation_ID = ntohl(call->tmp); afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); /* ask the cache manager to route the call (it'll change the call type * if successful) */ if (!afs_cm_incoming_call(call)) return -ENOTSUPP; trace_afs_cb_call(call); call->work.func = call->type->work; /* pass responsibility for the remainer of this message off to the * cache manager op */ return call->type->deliver(call); } /* * Advance the AFS call state when an RxRPC service call ends the transmit * phase. */ static void afs_notify_end_reply_tx(struct sock *sock, struct rxrpc_call *rxcall, unsigned long call_user_ID) { struct afs_call *call = (struct afs_call *)call_user_ID; afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK); } /* * send an empty reply */ void afs_send_empty_reply(struct afs_call *call) { struct afs_net *net = call->net; struct msghdr msg; _enter(""); rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0); msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, NULL, 0, 0); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0, afs_notify_end_reply_tx)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, RXGEN_SS_MARSHAL, -ENOMEM, afs_abort_oom); fallthrough; default: _leave(" [error]"); return; } } /* * send a simple reply */ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) { struct afs_net *net = call->net; struct msghdr msg; struct kvec iov[1]; int n; _enter(""); rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len); iov[0].iov_base = (void *) buf; iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len, afs_notify_end_reply_tx); if (n >= 0) { /* Success */ _leave(" [replied]"); return; } if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, RXGEN_SS_MARSHAL, -ENOMEM, afs_abort_oom); } _leave(" [error]"); } /* * Extract a piece of data from the received data socket buffers. */ int afs_extract_data(struct afs_call *call, bool want_more) { struct afs_net *net = call->net; struct iov_iter *iter = call->iter; enum afs_call_state state; u32 remote_abort = 0; int ret; _enter("{%s,%zu,%zu},%d", call->type->name, call->iov_len, iov_iter_count(iter), want_more); ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, &call->iov_len, want_more, &remote_abort, &call->service_id); trace_afs_receive_data(call, call->iter, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; state = READ_ONCE(call->state); if (ret == 1) { switch (state) { case AFS_CALL_CL_AWAIT_REPLY: afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY); break; case AFS_CALL_SV_AWAIT_REQUEST: afs_set_call_state(call, state, AFS_CALL_SV_REPLYING); break; case AFS_CALL_COMPLETE: kdebug("prem complete %d", call->error); return afs_io_error(call, afs_io_error_extract); default: break; } return 0; } afs_set_call_complete(call, ret, remote_abort); return ret; } /* * Log protocol error production. */ noinline int afs_protocol_error(struct afs_call *call, enum afs_eproto_cause cause) { trace_afs_protocol_error(call, cause); if (call) call->unmarshalling_error = true; return -EBADMSG; }
5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 9 9 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 // SPDX-License-Identifier: GPL-2.0 /* * blk-mq scheduling framework * * Copyright (C) 2016 Jens Axboe */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/list_sort.h> #include <trace/events/block.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-wbt.h" /* * Mark a hardware queue as needing a restart. */ void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) return; set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx); void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); /* * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch) * in blk_mq_run_hw_queue(). Its pair is the barrier in * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART, * meantime new request added to hctx->dispatch is missed to check in * blk_mq_run_hw_queue(). */ smp_mb(); blk_mq_run_hw_queue(hctx, true); } static int sched_rq_cmp(void *priv, const struct list_head *a, const struct list_head *b) { struct request *rqa = container_of(a, struct request, queuelist); struct request *rqb = container_of(b, struct request, queuelist); return rqa->mq_hctx > rqb->mq_hctx; } static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list) { struct blk_mq_hw_ctx *hctx = list_first_entry(rq_list, struct request, queuelist)->mq_hctx; struct request *rq; LIST_HEAD(hctx_list); unsigned int count = 0; list_for_each_entry(rq, rq_list, queuelist) { if (rq->mq_hctx != hctx) { list_cut_before(&hctx_list, rq_list, &rq->queuelist); goto dispatch; } count++; } list_splice_tail_init(rq_list, &hctx_list); dispatch: return blk_mq_dispatch_rq_list(hctx, &hctx_list, count); } #define BLK_MQ_BUDGET_DELAY 3 /* ms units */ /* * Only SCSI implements .get_budget and .put_budget, and SCSI restarts * its queue by itself in its completion handler, so we don't need to * restart queue if .get_budget() fails to get the budget. * * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to * be run again. This is necessary to avoid starving flushes. */ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; bool multi_hctxs = false, run_queue = false; bool dispatched = false, busy = false; unsigned int max_dispatch; LIST_HEAD(rq_list); int count = 0; if (hctx->dispatch_busy) max_dispatch = 1; else max_dispatch = hctx->queue->nr_requests; do { struct request *rq; int budget_token; if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) break; if (!list_empty_careful(&hctx->dispatch)) { busy = true; break; } budget_token = blk_mq_get_dispatch_budget(q); if (budget_token < 0) break; rq = e->type->ops.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(q, budget_token); /* * We're releasing without dispatching. Holding the * budget could have blocked any "hctx"s with the * same queue and if we didn't dispatch then there's * no guarantee anyone will kick the queue. Kick it * ourselves. */ run_queue = true; break; } blk_mq_set_rq_budget_token(rq, budget_token); /* * Now this rq owns the budget which has to be released * if this rq won't be queued to driver via .queue_rq() * in blk_mq_dispatch_rq_list(). */ list_add_tail(&rq->queuelist, &rq_list); count++; if (rq->mq_hctx != hctx) multi_hctxs = true; /* * If we cannot get tag for the request, stop dequeueing * requests from the IO scheduler. We are unlikely to be able * to submit them anyway and it creates false impression for * scheduling heuristics that the device can take more IO. */ if (!blk_mq_get_driver_tag(rq)) break; } while (count < max_dispatch); if (!count) { if (run_queue) blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); } else if (multi_hctxs) { /* * Requests from different hctx may be dequeued from some * schedulers, such as bfq and deadline. * * Sort the requests in the list according to their hctx, * dispatch batching requests from same hctx at a time. */ list_sort(NULL, &rq_list, sched_rq_cmp); do { dispatched |= blk_mq_dispatch_hctx_list(&rq_list); } while (!list_empty(&rq_list)); } else { dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count); } if (busy) return -EAGAIN; return !!dispatched; } static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { unsigned long end = jiffies + HZ; int ret; do { ret = __blk_mq_do_dispatch_sched(hctx); if (ret != 1) break; if (need_resched() || time_is_before_jiffies(end)) { blk_mq_delay_run_hw_queue(hctx, 0); break; } } while (1); return ret; } static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { unsigned short idx = ctx->index_hw[hctx->type]; if (++idx == hctx->nr_ctx) idx = 0; return hctx->ctxs[idx]; } /* * Only SCSI implements .get_budget and .put_budget, and SCSI restarts * its queue by itself in its completion handler, so we don't need to * restart queue if .get_budget() fails to get the budget. * * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to * be run again. This is necessary to avoid starving flushes. */ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; LIST_HEAD(rq_list); struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); int ret = 0; struct request *rq; do { int budget_token; if (!list_empty_careful(&hctx->dispatch)) { ret = -EAGAIN; break; } if (!sbitmap_any_bit_set(&hctx->ctx_map)) break; budget_token = blk_mq_get_dispatch_budget(q); if (budget_token < 0) break; rq = blk_mq_dequeue_from_ctx(hctx, ctx); if (!rq) { blk_mq_put_dispatch_budget(q, budget_token); /* * We're releasing without dispatching. Holding the * budget could have blocked any "hctx"s with the * same queue and if we didn't dispatch then there's * no guarantee anyone will kick the queue. Kick it * ourselves. */ blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); break; } blk_mq_set_rq_budget_token(rq, budget_token); /* * Now this rq owns the budget which has to be released * if this rq won't be queued to driver via .queue_rq() * in blk_mq_dispatch_rq_list(). */ list_add(&rq->queuelist, &rq_list); /* round robin for fair dispatch */ ctx = blk_mq_next_ctx(hctx, rq->mq_ctx); } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1)); WRITE_ONCE(hctx->dispatch_from, ctx); return ret; } static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { bool need_dispatch = false; LIST_HEAD(rq_list); /* * If we have previous entries on our dispatch list, grab them first for * more fair dispatch. */ if (!list_empty_careful(&hctx->dispatch)) { spin_lock(&hctx->lock); if (!list_empty(&hctx->dispatch)) list_splice_init(&hctx->dispatch, &rq_list); spin_unlock(&hctx->lock); } /* * Only ask the scheduler for requests, if we didn't have residual * requests from the dispatch list. This is to avoid the case where * we only ever dispatch a fraction of the requests available because * of low device queue depth. Once we pull requests out of the IO * scheduler, we can no longer merge or sort them. So it's best to * leave them there for as long as we can. Mark the hw queue as * needing a restart in that case. * * We want to dispatch from the scheduler if there was nothing * on the dispatch list or we were able to dispatch from the * dispatch list. */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); if (!blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) return 0; need_dispatch = true; } else { need_dispatch = hctx->dispatch_busy; } if (hctx->queue->elevator) return blk_mq_do_dispatch_sched(hctx); /* dequeue request one by one from sw queue if queue is busy */ if (need_dispatch) return blk_mq_do_dispatch_ctx(hctx); blk_mq_flush_busy_ctxs(hctx, &rq_list); blk_mq_dispatch_rq_list(hctx, &rq_list, 0); return 0; } void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; /* RCU or SRCU read lock is needed before checking quiesced flag */ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) return; /* * A return of -EAGAIN is an indication that hctx->dispatch is not * empty and we must run again in order to avoid starving flushes. */ if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) { if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) blk_mq_run_hw_queue(hctx, true); } } bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; bool ret = false; enum hctx_type type; if (e && e->type->ops.bio_merge) { ret = e->type->ops.bio_merge(q, bio, nr_segs); goto out_put; } ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (list_empty_careful(&ctx->rq_lists[type])) goto out_put; /* default per sw-queue merge */ spin_lock(&ctx->lock); /* * Reverse check our software queue for entries that we could * potentially merge with. Currently includes a hand-wavy stop * count of 8, to not spend too much time checking for merges. */ if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) ret = true; spin_unlock(&ctx->lock); out_put: return ret; } bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, struct list_head *free) { return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free); } EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { if (blk_mq_is_shared_tags(q->tag_set->flags)) { hctx->sched_tags = q->sched_shared_tags; return 0; } hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx, q->nr_requests); if (!hctx->sched_tags) return -ENOMEM; return 0; } static void blk_mq_exit_sched_shared_tags(struct request_queue *queue) { blk_mq_free_rq_map(queue->sched_shared_tags); queue->sched_shared_tags = NULL; } /* called in queue's release handler, tagset has gone away */ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags) { struct blk_mq_hw_ctx *hctx; unsigned long i; queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) { if (!blk_mq_is_shared_tags(flags)) blk_mq_free_rq_map(hctx->sched_tags); hctx->sched_tags = NULL; } } if (blk_mq_is_shared_tags(flags)) blk_mq_exit_sched_shared_tags(q); } static int blk_mq_init_sched_shared_tags(struct request_queue *queue) { struct blk_mq_tag_set *set = queue->tag_set; /* * Set initial depth at max so that we don't need to reallocate for * updating nr_requests. */ queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set, BLK_MQ_NO_HCTX_IDX, MAX_SCHED_RQ); if (!queue->sched_shared_tags) return -ENOMEM; blk_mq_tag_update_sched_shared_tags(queue); return 0; } /* caller must have a reference to @e, will grab another one if successful */ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) { unsigned int flags = q->tag_set->flags; struct blk_mq_hw_ctx *hctx; struct elevator_queue *eq; unsigned long i; int ret; /* * Default to double of smaller one between hw queue_depth and 128, * since we don't split into sync/async like the old code did. * Additionally, this is a per-hw queue depth. */ q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, BLKDEV_DEFAULT_RQ); if (blk_mq_is_shared_tags(flags)) { ret = blk_mq_init_sched_shared_tags(q); if (ret) return ret; } queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); if (ret) goto err_free_map_and_rqs; } ret = e->ops.init_sched(q, e); if (ret) goto err_free_map_and_rqs; mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { ret = e->ops.init_hctx(hctx, i); if (ret) { eq = q->elevator; blk_mq_sched_free_rqs(q); blk_mq_exit_sched(q, eq); kobject_put(&eq->kobj); return ret; } } mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); mutex_unlock(&q->debugfs_mutex); } return 0; err_free_map_and_rqs: blk_mq_sched_free_rqs(q); blk_mq_sched_tags_teardown(q, flags); q->elevator = NULL; return ret; } /* * called in either blk_queue_cleanup or elevator_switch, tagset * is required for freeing requests */ void blk_mq_sched_free_rqs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i; if (blk_mq_is_shared_tags(q->tag_set->flags)) { blk_mq_free_rqs(q->tag_set, q->sched_shared_tags, BLK_MQ_NO_HCTX_IDX); } else { queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); } } } void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { struct blk_mq_hw_ctx *hctx; unsigned long i; unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); mutex_unlock(&q->debugfs_mutex); if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); mutex_unlock(&q->debugfs_mutex); if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); q->elevator = NULL; }
3 1 3 1 8 1 2 4 1 5 2 4 4 3 3 3 1 9 8 9 9 4 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/rhashtable.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/if_ether.h> #include <net/gso.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/neighbour.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack_acct.h> /* For layer 4 checksum field offset. */ #include <linux/tcp.h> #include <linux/udp.h> static int nf_flow_state_check(struct flow_offload *flow, int proto, struct sk_buff *skb, unsigned int thoff) { struct tcphdr *tcph; if (proto != IPPROTO_TCP) return 0; tcph = (void *)(skb_network_header(skb) + thoff); if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) { flow_offload_teardown(flow); return -1; } if ((tcph->fin || tcph->rst) && !test_bit(NF_FLOW_CLOSING, &flow->flags)) set_bit(NF_FLOW_CLOSING, &flow->flags); return 0; } static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, __be32 addr, __be32 new_addr) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); } static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, __be32 addr, __be32 new_addr) { struct udphdr *udph; udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, new_addr, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } } static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, unsigned int thoff, __be32 addr, __be32 new_addr) { switch (iph->protocol) { case IPPROTO_TCP: nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); break; case IPPROTO_UDP: nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); break; } } static void nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, struct iphdr *iph, unsigned int thoff, enum flow_offload_tuple_dir dir) { __be32 addr, new_addr; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = iph->saddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; iph->saddr = new_addr; break; case FLOW_OFFLOAD_DIR_REPLY: addr = iph->daddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; iph->daddr = new_addr; break; } csum_replace4(&iph->check, addr, new_addr); nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } static void nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, struct iphdr *iph, unsigned int thoff, enum flow_offload_tuple_dir dir) { __be32 addr, new_addr; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = iph->daddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; iph->daddr = new_addr; break; case FLOW_OFFLOAD_DIR_REPLY: addr = iph->saddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; iph->saddr = new_addr; break; } csum_replace4(&iph->check, addr, new_addr); nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, enum flow_offload_tuple_dir dir, struct iphdr *iph) { if (test_bit(NF_FLOW_SNAT, &flow->flags)) { nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir); nf_flow_snat_ip(flow, skb, iph, thoff, dir); } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir); nf_flow_dnat_ip(flow, skb, iph, thoff, dir); } } static bool ip_has_options(unsigned int thoff) { return thoff != sizeof(struct iphdr); } static void nf_flow_tuple_encap(struct sk_buff *skb, struct flow_offload_tuple *tuple) { struct vlan_ethhdr *veth; struct pppoe_hdr *phdr; int i = 0; if (skb_vlan_tag_present(skb)) { tuple->encap[i].id = skb_vlan_tag_get(skb); tuple->encap[i].proto = skb->vlan_proto; i++; } switch (skb->protocol) { case htons(ETH_P_8021Q): veth = (struct vlan_ethhdr *)skb_mac_header(skb); tuple->encap[i].id = ntohs(veth->h_vlan_TCI); tuple->encap[i].proto = skb->protocol; break; case htons(ETH_P_PPP_SES): phdr = (struct pppoe_hdr *)skb_network_header(skb); tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].proto = skb->protocol; break; } } struct nf_flowtable_ctx { const struct net_device *in; u32 offset; u32 hdrsize; }; static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, struct flow_offload_tuple *tuple) { struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; u8 ipproto; if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) return -1; iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4); if (ip_is_fragment(iph) || unlikely(ip_has_options(thoff))) return -1; thoff += ctx->offset; ipproto = iph->protocol; switch (ipproto) { case IPPROTO_TCP: ctx->hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: ctx->hdrsize = sizeof(struct udphdr); break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: ctx->hdrsize = sizeof(struct gre_base_hdr); break; #endif default: return -1; } if (iph->ttl <= 1) return -1; if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) return -1; switch (ipproto) { case IPPROTO_TCP: case IPPROTO_UDP: ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_port = ports->source; tuple->dst_port = ports->dest; break; case IPPROTO_GRE: { struct gre_base_hdr *greh; greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) return -1; break; } } iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; tuple->l3proto = AF_INET; tuple->l4proto = ipproto; tuple->iifidx = ctx->in->ifindex; nf_flow_tuple_encap(skb, tuple); return 0; } /* Based on ip_exceeds_mtu(). */ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; } static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) { if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) return true; return dst_check(tuple->dst_cache, tuple->dst_cookie); } static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, const struct nf_hook_state *state, struct dst_entry *dst) { skb_orphan(skb); skb_dst_set_noref(skb, dst); dst_output(state->net, state->sk, skb); return NF_STOLEN; } static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, u32 *offset) { struct vlan_ethhdr *veth; __be16 inner_proto; switch (skb->protocol) { case htons(ETH_P_8021Q): if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) return false; veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; return true; } break; case htons(ETH_P_PPP_SES): if (nf_flow_pppoe_proto(skb, &inner_proto) && inner_proto == proto) { *offset += PPPOE_SES_HLEN; return true; } break; } return false; } static void nf_flow_encap_pop(struct sk_buff *skb, struct flow_offload_tuple_rhash *tuplehash) { struct vlan_hdr *vlan_hdr; int i; for (i = 0; i < tuplehash->tuple.encap_num; i++) { if (skb_vlan_tag_present(skb)) { __vlan_hwaccel_clear_tag(skb); continue; } switch (skb->protocol) { case htons(ETH_P_8021Q): vlan_hdr = (struct vlan_hdr *)skb->data; __skb_pull(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vlan_hdr); skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): skb->protocol = __nf_flow_pppoe_proto(skb); skb_pull(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; } } } static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, const struct flow_offload_tuple_rhash *tuplehash, unsigned short type) { struct net_device *outdev; outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx); if (!outdev) return NF_DROP; skb->dev = outdev; dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest, tuplehash->tuple.out.h_source, skb->len); dev_queue_xmit(skb); return NF_STOLEN; } static struct flow_offload_tuple_rhash * nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) { struct flow_offload_tuple tuple = {}; if (skb->protocol != htons(ETH_P_IP) && !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset)) return NULL; if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) return NULL; return flow_offload_lookup(flow_table, &tuple); } static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct flow_offload_tuple_rhash *tuplehash, struct sk_buff *skb) { enum flow_offload_tuple_dir dir; struct flow_offload *flow; unsigned int thoff, mtu; struct iphdr *iph; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4) + ctx->offset; if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return 0; if (!nf_flow_dst_check(&tuplehash->tuple)) { flow_offload_teardown(flow); return 0; } if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); thoff -= ctx->offset; iph = ip_hdr(skb); nf_flow_nat_ip(flow, skb, thoff, dir, iph); ip_decrease_ttl(iph); skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); return 1; } unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct flow_offload_tuple_rhash *tuplehash; struct nf_flowtable *flow_table = priv; enum flow_offload_tuple_dir dir; struct nf_flowtable_ctx ctx = { .in = state->in, }; struct flow_offload *flow; struct net_device *outdev; struct rtable *rt; __be32 nexthop; int ret; tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb); if (!tuplehash) return NF_ACCEPT; ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb); if (ret < 0) return NF_DROP; else if (ret == 0) return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { rt = dst_rtable(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet_skb_parm)); IPCB(skb)->iif = skb->dev->ifindex; IPCB(skb)->flags = IPSKB_FORWARDED; return nf_flow_xmit_xfrm(skb, state, &rt->dst); } dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rtable(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); skb_dst_set_noref(skb, &rt->dst); neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); ret = NF_STOLEN; break; case FLOW_OFFLOAD_XMIT_DIRECT: ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP); if (ret == NF_DROP) flow_offload_teardown(flow); break; default: WARN_ON_ONCE(1); ret = NF_DROP; break; } return ret; } EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, struct in6_addr *addr, struct in6_addr *new_addr, struct ipv6hdr *ip6h) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, new_addr->s6_addr32, true); } static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, struct in6_addr *addr, struct in6_addr *new_addr) { struct udphdr *udph; udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, new_addr->s6_addr32, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } } static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, unsigned int thoff, struct in6_addr *addr, struct in6_addr *new_addr) { switch (ip6h->nexthdr) { case IPPROTO_TCP: nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); break; case IPPROTO_UDP: nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); break; } } static void nf_flow_snat_ipv6(const struct flow_offload *flow, struct sk_buff *skb, struct ipv6hdr *ip6h, unsigned int thoff, enum flow_offload_tuple_dir dir) { struct in6_addr addr, new_addr; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = ip6h->saddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; ip6h->saddr = new_addr; break; case FLOW_OFFLOAD_DIR_REPLY: addr = ip6h->daddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; ip6h->daddr = new_addr; break; } nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); } static void nf_flow_dnat_ipv6(const struct flow_offload *flow, struct sk_buff *skb, struct ipv6hdr *ip6h, unsigned int thoff, enum flow_offload_tuple_dir dir) { struct in6_addr addr, new_addr; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = ip6h->daddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; ip6h->daddr = new_addr; break; case FLOW_OFFLOAD_DIR_REPLY: addr = ip6h->saddr; new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; ip6h->saddr = new_addr; break; } nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); } static void nf_flow_nat_ipv6(const struct flow_offload *flow, struct sk_buff *skb, enum flow_offload_tuple_dir dir, struct ipv6hdr *ip6h) { unsigned int thoff = sizeof(*ip6h); if (test_bit(NF_FLOW_SNAT, &flow->flags)) { nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir); nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir); nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); } } static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, struct flow_offload_tuple *tuple) { struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; u8 nexthdr; thoff = sizeof(*ip6h) + ctx->offset; if (!pskb_may_pull(skb, thoff)) return -1; ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); nexthdr = ip6h->nexthdr; switch (nexthdr) { case IPPROTO_TCP: ctx->hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: ctx->hdrsize = sizeof(struct udphdr); break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: ctx->hdrsize = sizeof(struct gre_base_hdr); break; #endif default: return -1; } if (ip6h->hop_limit <= 1) return -1; if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) return -1; switch (nexthdr) { case IPPROTO_TCP: case IPPROTO_UDP: ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_port = ports->source; tuple->dst_port = ports->dest; break; case IPPROTO_GRE: { struct gre_base_hdr *greh; greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) return -1; break; } } ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; tuple->l3proto = AF_INET6; tuple->l4proto = nexthdr; tuple->iifidx = ctx->in->ifindex; nf_flow_tuple_encap(skb, tuple); return 0; } static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct flow_offload_tuple_rhash *tuplehash, struct sk_buff *skb) { enum flow_offload_tuple_dir dir; struct flow_offload *flow; unsigned int thoff, mtu; struct ipv6hdr *ip6h; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); thoff = sizeof(*ip6h) + ctx->offset; if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) return 0; if (!nf_flow_dst_check(&tuplehash->tuple)) { flow_offload_teardown(flow); return 0; } if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); ip6h = ipv6_hdr(skb); nf_flow_nat_ipv6(flow, skb, dir, ip6h); ip6h->hop_limit--; skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); return 1; } static struct flow_offload_tuple_rhash * nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) { struct flow_offload_tuple tuple = {}; if (skb->protocol != htons(ETH_P_IPV6) && !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset)) return NULL; if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) return NULL; return flow_offload_lookup(flow_table, &tuple); } unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct flow_offload_tuple_rhash *tuplehash; struct nf_flowtable *flow_table = priv; enum flow_offload_tuple_dir dir; struct nf_flowtable_ctx ctx = { .in = state->in, }; const struct in6_addr *nexthop; struct flow_offload *flow; struct net_device *outdev; struct rt6_info *rt; int ret; tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb); if (tuplehash == NULL) return NF_ACCEPT; ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb); if (ret < 0) return NF_DROP; else if (ret == 0) return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { rt = dst_rt6_info(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->flags = IP6SKB_FORWARDED; return nf_flow_xmit_xfrm(skb, state, &rt->dst); } dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rt6_info(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); skb_dst_set_noref(skb, &rt->dst); neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); ret = NF_STOLEN; break; case FLOW_OFFLOAD_XMIT_DIRECT: ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6); if (ret == NF_DROP) flow_offload_teardown(flow); break; default: WARN_ON_ONCE(1); ret = NF_DROP; break; } return ret; } EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
17 17 17 17 17 17 17 17 17 17 17 17 17 17 24 24 24 15 1 1 1 1 24 26 24 24 1 14 3 2 3 5 25 25 25 25 25 25 25 25 26 26 25 25 24 24 24 24 32 32 32 17 3 20 14 24 2 2 2 1 1 3 3 3 3 3 2 2 2 1 1 1 1 1 1 1 1 1 6 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include <linux/device.h> #include <net/genetlink.h> #include <net/sock.h> #include "devl_internal.h" struct devlink_info_req { struct sk_buff *msg; void (*version_cb)(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv); void *version_cb_priv; }; struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; }; static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = { { /* can't reinitialize driver with no down time */ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT, .limit = DEVLINK_RELOAD_LIMIT_NO_RESET, }, }; static bool devlink_reload_combination_is_invalid(enum devlink_reload_action action, enum devlink_reload_limit limit) { int i; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) if (devlink_reload_invalid_combinations[i].action == action && devlink_reload_invalid_combinations[i].limit == limit) return true; return false; } static bool devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action) { return test_bit(action, &devlink->ops->reload_actions); } static bool devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit) { return test_bit(limit, &devlink->ops->reload_limits); } static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value) { struct nlattr *reload_stats_entry; reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY); if (!reload_stats_entry) return -EMSGSIZE; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value)) goto nla_put_failure; nla_nest_end(msg, reload_stats_entry); return 0; nla_put_failure: nla_nest_cancel(msg, reload_stats_entry); return -EMSGSIZE; } static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote) { struct nlattr *reload_stats_attr, *act_info, *act_stats; int i, j, stat_idx; u32 value; if (!is_remote) reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS); else reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS); if (!reload_stats_attr) return -EMSGSIZE; for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || i == DEVLINK_RELOAD_ACTION_UNSPEC) continue; act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); if (!act_info) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) goto action_info_nest_cancel; act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); if (!act_stats) goto action_info_nest_cancel; for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { /* Remote stats are shown even if not locally supported. * Stats of actions with unspecified limit are shown * though drivers don't need to register unspecified * limit. */ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && !devlink_reload_limit_is_supported(devlink, j)) || devlink_reload_combination_is_invalid(i, j)) continue; stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i; if (!is_remote) value = devlink->stats.reload_stats[stat_idx]; else value = devlink->stats.remote_reload_stats[stat_idx]; if (devlink_reload_stat_put(msg, j, value)) goto action_stats_nest_cancel; } nla_nest_end(msg, act_stats); nla_nest_end(msg, act_info); } nla_nest_end(msg, reload_stats_attr); return 0; action_stats_nest_cancel: nla_nest_cancel(msg, act_stats); action_info_nest_cancel: nla_nest_cancel(msg, act_info); nla_put_failure: nla_nest_cancel(msg, reload_stats_attr); return -EMSGSIZE; } static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) { unsigned long rel_index; void *unused; int err; xa_for_each(&devlink->nested_rels, rel_index, unused) { err = devlink_rel_devlink_handle_put(msg, devlink, rel_index, DEVLINK_ATTR_NESTED_DEVLINK, NULL); if (err) return err; } return 0; } static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { struct nlattr *dev_stats; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed)) goto nla_put_failure; dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS); if (!dev_stats) goto nla_put_failure; if (devlink_reload_stats_put(msg, devlink, false)) goto dev_stats_nest_cancel; if (devlink_reload_stats_put(msg, devlink, true)) goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); if (devlink_nl_nested_fill(msg, devlink)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; dev_stats_nest_cancel: nla_nest_cancel(msg, dev_stats); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); WARN_ON(!devl_is_registered(devlink)); if (!devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } devlink_nl_notify_send(devlink, msg); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); } int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) { devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, u32 rel_index) { xa_erase(&devlink->nested_rels, rel_index); } int devl_nested_devlink_set(struct devlink *devlink, struct devlink *nested_devlink) { u32 rel_index; int err; err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, devlink_rel_notify_cb, devlink_rel_cleanup_cb, nested_devlink); if (err) return err; return xa_insert(&devlink->nested_rels, rel_index, xa_mk_value(0), GFP_KERNEL); } EXPORT_SYMBOL_GPL(devl_nested_devlink_set); void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); devlink_linecards_notify_register(devlink); devlink_ports_notify_register(devlink); devlink_trap_policers_notify_register(devlink); devlink_trap_groups_notify_register(devlink); devlink_traps_notify_register(devlink); devlink_rates_notify_register(devlink); devlink_regions_notify_register(devlink); devlink_params_notify_register(devlink); } void devlink_notify_unregister(struct devlink *devlink) { devlink_params_notify_unregister(devlink); devlink_regions_notify_unregister(devlink); devlink_rates_notify_unregister(devlink); devlink_traps_notify_unregister(devlink); devlink_trap_groups_notify_unregister(devlink); devlink_trap_policers_notify_unregister(devlink); devlink_ports_notify_unregister(devlink); devlink_linecards_notify_unregister(devlink); devlink_notify(devlink, DEVLINK_CMD_DEL); } static void devlink_reload_failed_set(struct devlink *devlink, bool reload_failed) { if (devlink->reload_failed == reload_failed) return; devlink->reload_failed = reload_failed; devlink_notify(devlink, DEVLINK_CMD_NEW); } bool devlink_is_reload_failed(const struct devlink *devlink) { return devlink->reload_failed; } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); static void __devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats, enum devlink_reload_limit limit, u32 actions_performed) { unsigned long actions = actions_performed; int stat_idx; int action; for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) { stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action; reload_stats[stat_idx]++; } devlink_notify(devlink, DEVLINK_CMD_NEW); } static void devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit, actions_performed); } /** * devlink_remote_reload_actions_performed - Update devlink on reload actions * performed which are not a direct result of devlink reload call. * * This should be called by a driver after performing reload actions in case it was not * a result of devlink reload call. For example fw_activate was performed as a result * of devlink reload triggered fw_activate on another host. * The motivation for this function is to keep data on reload actions performed on this * function whether it was done due to direct devlink reload call or not. * * @devlink: devlink * @limit: reload limit * @actions_performed: bitmask of actions performed */ void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, u32 actions_performed) { if (WARN_ON(!actions_performed || actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) || limit > DEVLINK_RELOAD_LIMIT_MAX)) return; __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit, actions_performed); } EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed); static struct net *devlink_netns_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; struct net *net; if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); return ERR_PTR(-EINVAL); } if (netns_pid_attr) { net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); } else if (netns_fd_attr) { net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); } else if (netns_id_attr) { net = get_net_ns_by_id(sock_net(skb->sk), nla_get_u32(netns_id_attr)); if (!net) net = ERR_PTR(-EINVAL); } else { WARN_ON(1); net = ERR_PTR(-EINVAL); } if (IS_ERR(net)) { NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); return ERR_PTR(-EINVAL); } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EPERM); } return net; } static void devlink_reload_netns_change(struct devlink *devlink, struct net *curr_net, struct net *dest_net) { /* Userspace needs to be notified about devlink objects * removed from original and entering new network namespace. * The rest of the devlink objects are re-created during * reload process so the notifications are generated separatelly. */ devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); devlink_rel_nested_in_notify(devlink); } static void devlink_reload_reinit_sanity_check(struct devlink *devlink) { WARN_ON(!list_empty(&devlink->trap_policer_list)); WARN_ON(!list_empty(&devlink->trap_group_list)); WARN_ON(!list_empty(&devlink->trap_list)); WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); WARN_ON(!list_empty(&devlink->rate_list)); WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!xa_empty(&devlink->ports)); } int devlink_reload(struct devlink *devlink, struct net *dest_net, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; struct net *curr_net; int err; /* Make sure the reload operations are invoked with the device lock * held to allow drivers to trigger functionality that expects it * (e.g., PCI reset) and to close possible races between these * operations and probe/remove. */ device_lock_assert(devlink->dev); memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack); if (err) return err; curr_net = devlink_net(devlink); if (dest_net && !net_eq(dest_net, curr_net)) devlink_reload_netns_change(devlink, curr_net, dest_net); if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { devlink_params_driverinit_load_new(devlink); devlink_reload_reinit_sanity_check(devlink); } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); if (err) return err; WARN_ON(!(*actions_performed & BIT(action))); /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats))); devlink_reload_stats_update(devlink, limit, *actions_performed); return 0; } static int devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed, enum devlink_command cmd, struct genl_info *info) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed, actions_performed)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return -EMSGSIZE; } int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_reload_action action; enum devlink_reload_limit limit; struct net *dest_net = NULL; u32 actions_performed; int err; err = devlink_resources_validate(devlink, NULL, info); if (err) { NL_SET_ERR_MSG(info->extack, "resources size validation failed"); return err; } action = nla_get_u8_default(info->attrs[DEVLINK_ATTR_RELOAD_ACTION], DEVLINK_RELOAD_ACTION_DRIVER_REINIT); if (!devlink_reload_action_is_supported(devlink, action)) { NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver"); return -EOPNOTSUPP; } limit = DEVLINK_RELOAD_LIMIT_UNSPEC; if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) { struct nla_bitfield32 limits; u32 limits_selected; limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]); limits_selected = limits.value & limits.selector; if (!limits_selected) { NL_SET_ERR_MSG(info->extack, "Invalid limit selected"); return -EINVAL; } for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++) if (limits_selected & BIT(limit)) break; /* UAPI enables multiselection, but currently it is not used */ if (limits_selected != BIT(limit)) { NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported"); return -EOPNOTSUPP; } if (!devlink_reload_limit_is_supported(devlink, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver"); return -EOPNOTSUPP; } if (devlink_reload_combination_is_invalid(action, limit)) { NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action"); return -EINVAL; } } if (info->attrs[DEVLINK_ATTR_NETNS_PID] || info->attrs[DEVLINK_ATTR_NETNS_FD] || info->attrs[DEVLINK_ATTR_NETNS_ID]) { dest_net = devlink_netns_get(skb, info); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); if (!net_eq(dest_net, devlink_net(devlink)) && action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { NL_SET_ERR_MSG_MOD(info->extack, "Changing namespace is only supported for reinit action"); return -EOPNOTSUPP; } } err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) put_net(dest_net); if (err) return err; /* For backward compatibility generate reply only if attributes used by user */ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) return 0; return devlink_nl_reload_actions_performed_snd(devlink, actions_performed, DEVLINK_CMD_RELOAD, info); } bool devlink_reload_actions_valid(const struct devlink_ops *ops) { const struct devlink_reload_combination *comb; int i; if (!devlink_reload_supported(ops)) { if (WARN_ON(ops->reload_actions)) return false; return true; } if (WARN_ON(!ops->reload_actions || ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) || ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX))) return false; if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) || ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX))) return false; for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) { comb = &devlink_reload_invalid_combinations[i]; if (ops->reload_actions == BIT(comb->action) && ops->reload_limits == BIT(comb->limit)) return false; } return true; } static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; void *hdr; int err = 0; u16 mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = devlink_nl_put_handle(msg, devlink); if (err) goto nla_put_failure; if (ops->eswitch_mode_get) { err = ops->eswitch_mode_get(devlink, &mode); if (err) goto nla_put_failure; err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); if (err) goto nla_put_failure; } if (ops->eswitch_inline_mode_get) { err = ops->eswitch_inline_mode_get(devlink, &inline_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, inline_mode); if (err) goto nla_put_failure; } if (ops->eswitch_encap_mode_get) { err = ops->eswitch_encap_mode_get(devlink, &encap_mode); if (err) goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode); if (err) goto nla_put_failure; } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; u8 inline_mode; int err = 0; u16 mode; if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) { if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); err = devlink_rate_nodes_check(devlink, mode, info->extack); if (err) return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) { if (!ops->eswitch_inline_mode_set) return -EOPNOTSUPP; inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); err = ops->eswitch_inline_mode_set(devlink, inline_mode, info->extack); if (err) return err; } if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) { if (!ops->eswitch_encap_mode_set) return -EOPNOTSUPP; encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); err = ops->eswitch_encap_mode_set(devlink, encap_mode, info->extack); if (err) return err; } return 0; } int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn) { if (!req->msg) return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, bsn); } EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { struct nlattr *nest; int err; if (req->version_cb) req->version_cb(version_name, version_type, req->version_cb_priv); if (!req->msg) return 0; nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME, version_name); if (err) goto nla_put_failure; err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE, version_value); if (err) goto nla_put_failure; nla_nest_end(req->msg, nest); return 0; nla_put_failure: nla_nest_cancel(req->msg, nest); return err; } int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); int devlink_info_version_stored_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext); int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put); int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_name, const char *version_value, enum devlink_info_version_type version_type) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, version_name, version_value, version_type); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); static int devlink_nl_driver_info_get(struct device_driver *drv, struct devlink_info_req *req) { if (!drv) return 0; if (drv->name[0]) return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, drv->name); return 0; } static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; req.msg = msg; if (devlink->ops->info_get) { err = devlink->ops->info_get(devlink, &req, extack); if (err) goto err_cancel_msg; } err = devlink_nl_driver_info_get(dev->driver, &req); if (err) goto err_cancel_msg; genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { int err; err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); if (err == -EOPNOTSUPP) err = 0; return err; } int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one); } static int devlink_nl_flash_update_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { void *hdr; hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS) goto out; if (params->status_msg && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG, params->status_msg)) goto nla_put_failure; if (params->component && nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, params->component)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, params->done)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, params->total)) goto nla_put_failure; if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, params->timeout)) goto nla_put_failure; out: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void __devlink_flash_update_notify(struct devlink *devlink, enum devlink_command cmd, struct devlink_flash_notify *params) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE && cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_flash_update_fill(msg, devlink, cmd, params); if (err) goto out_free_msg; devlink_nl_notify_send(devlink, msg); return; out_free_msg: nlmsg_free(msg); } static void devlink_flash_update_begin_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE, &params); } static void devlink_flash_update_end_notify(struct devlink *devlink) { struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_END, &params); } void devlink_flash_update_status_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long done, unsigned long total) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .done = done, .total = total, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, &params); } EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify); void devlink_flash_update_timeout_notify(struct devlink *devlink, const char *status_msg, const char *component, unsigned long timeout) { struct devlink_flash_notify params = { .status_msg = status_msg, .component = component, .timeout = timeout, }; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_STATUS, &params); } EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); struct devlink_flash_component_lookup_ctx { const char *lookup_name; bool lookup_name_found; }; static void devlink_flash_component_lookup_cb(const char *version_name, enum devlink_info_version_type version_type, void *version_cb_priv) { struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv; if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT || lookup_ctx->lookup_name_found) return; lookup_ctx->lookup_name_found = !strcmp(lookup_ctx->lookup_name, version_name); } static int devlink_flash_component_get(struct devlink *devlink, struct nlattr *nla_component, const char **p_component, struct netlink_ext_ack *extack) { struct devlink_flash_component_lookup_ctx lookup_ctx = {}; struct devlink_info_req req = {}; const char *component; int ret; if (!nla_component) return 0; component = nla_data(nla_component); if (!devlink->ops->info_get) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "component update is not supported by this device"); return -EOPNOTSUPP; } lookup_ctx.lookup_name = component; req.version_cb = devlink_flash_component_lookup_cb; req.version_cb_priv = &lookup_ctx; ret = devlink->ops->info_get(devlink, &req, NULL); if (ret) return ret; if (!lookup_ctx.lookup_name_found) { NL_SET_ERR_MSG_ATTR(extack, nla_component, "selected component is not supported by this device"); return -EINVAL; } *p_component = component; return 0; } int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; const char *file_name; u32 supported_params; int ret; if (!devlink->ops->flash_update) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME)) return -EINVAL; ret = devlink_flash_component_get(devlink, info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT], &params.component, info->extack); if (ret) return ret; supported_params = devlink->ops->supported_flash_update_params; nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK]; if (nla_overwrite_mask) { struct nla_bitfield32 sections; if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) { NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask, "overwrite settings are not supported by this device"); return -EOPNOTSUPP; } sections = nla_get_bitfield32(nla_overwrite_mask); params.overwrite_mask = sections.value & sections.selector; } nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]; file_name = nla_data(nla_file_name); ret = request_firmware(&params.fw, file_name, devlink->dev); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file"); return ret; } devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, &params, info->extack); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); return ret; } static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { struct devlink_info_req req = {}; const struct nlattr *nlattr; struct sk_buff *msg; int rem, err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; req.msg = msg; err = devlink->ops->info_get(devlink, &req, NULL); if (err) goto free_msg; nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING, (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE, nlattr, rem_kv) { strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } } free_msg: nlmsg_consume(msg); } void devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { if (!devlink->ops->info_get) return; devl_lock(devlink); if (devl_is_registered(devlink)) __devlink_compat_running_version(devlink, buf, len); devl_unlock(devlink); } int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) { struct devlink_flash_update_params params = {}; int ret; devl_lock(devlink); if (!devl_is_registered(devlink)) { ret = -ENODEV; goto out_unlock; } if (!devlink->ops->flash_update) { ret = -EOPNOTSUPP; goto out_unlock; } ret = request_firmware(&params.fw, file_name, devlink->dev); if (ret) goto out_unlock; devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, &params, NULL); devlink_flash_update_end_notify(devlink); release_firmware(params.fw); out_unlock: devl_unlock(devlink); return ret; } static int devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { struct nlattr *selftests; void *hdr; int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, DEVLINK_CMD_SELFTESTS_GET); if (!hdr) return -EMSGSIZE; err = -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto err_cancel_msg; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto err_cancel_msg; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { if (devlink->ops->selftest_check(devlink, i, extack)) { err = nla_put_flag(msg, i); if (err) goto err_cancel_msg; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return 0; err_cancel_msg: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; int err; if (!devlink->ops->selftest_check) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { if (!devlink->ops->selftest_check) return 0; return devlink_nl_selftests_fill(msg, devlink, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); } int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one); } static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, enum devlink_selftest_status test_status) { struct nlattr *result_attr; result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); if (!result_attr) return -EMSGSIZE; if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, test_status)) goto nla_put_failure; nla_nest_end(skb, result_attr); return 0; nla_put_failure: nla_nest_cancel(skb, result_attr); return -EMSGSIZE; } static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; struct devlink *devlink = info->user_ptr[0]; struct nlattr *attrs, *selftests; struct sk_buff *msg; void *hdr; int err; int i; if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS)) return -EINVAL; attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, devlink_selftest_nl_policy, info->extack); if (err < 0) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = -EMSGSIZE; hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); if (!hdr) goto free_msg; if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); if (!selftests) goto genlmsg_cancel; for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { enum devlink_selftest_status test_status; if (nla_get_flag(tb[i])) { if (!devlink->ops->selftest_check(devlink, i, info->extack)) { if (devlink_selftest_result_put(msg, i, DEVLINK_SELFTEST_STATUS_SKIP)) goto selftests_nest_cancel; continue; } test_status = devlink->ops->selftest_run(devlink, i, info->extack); if (devlink_selftest_result_put(msg, i, test_status)) goto selftests_nest_cancel; } } nla_nest_end(msg, selftests); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); selftests_nest_cancel: nla_nest_cancel(msg, selftests); genlmsg_cancel: genlmsg_cancel(msg, hdr); free_msg: nlmsg_free(msg); return err; }
377 103 8 103 65 29 35 33 60 59 1 2 1 35 22 7 13 13 13 13 13 13 33 32 4 48 25 25 71 11 2 8 29 10 52 1 157 157 153 154 154 27 2 2 133 133 105 1 116 98 116 1 36 18 7 6 8 8 78 38 21 13 21 21 21 21 44 22 76 56 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 /* SPDX-License-Identifier: GPL-2.0 */ /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #ifndef __MPTCP_PROTOCOL_H #define __MPTCP_PROTOCOL_H #include <linux/random.h> #include <net/tcp.h> #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> #include <net/genetlink.h> #include <net/rstreason.h> #define MPTCP_SUPPORTED_VERSION 1 /* MPTCP option bits */ #define OPTION_MPTCP_MPC_SYN BIT(0) #define OPTION_MPTCP_MPC_SYNACK BIT(1) #define OPTION_MPTCP_MPC_ACK BIT(2) #define OPTION_MPTCP_MPJ_SYN BIT(3) #define OPTION_MPTCP_MPJ_SYNACK BIT(4) #define OPTION_MPTCP_MPJ_ACK BIT(5) #define OPTION_MPTCP_ADD_ADDR BIT(6) #define OPTION_MPTCP_RM_ADDR BIT(7) #define OPTION_MPTCP_FASTCLOSE BIT(8) #define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_RST BIT(10) #define OPTION_MPTCP_DSS BIT(11) #define OPTION_MPTCP_FAIL BIT(12) #define OPTION_MPTCP_CSUMREQD BIT(13) #define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ OPTION_MPTCP_MPC_ACK) #define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ OPTION_MPTCP_MPJ_ACK) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 #define MPTCPOPT_MP_JOIN 1 #define MPTCPOPT_DSS 2 #define MPTCPOPT_ADD_ADDR 3 #define MPTCPOPT_RM_ADDR 4 #define MPTCPOPT_MP_PRIO 5 #define MPTCPOPT_MP_FAIL 6 #define MPTCPOPT_MP_FASTCLOSE 7 #define MPTCPOPT_RST 8 /* MPTCP suboption lengths */ #define TCPOLEN_MPTCP_MPC_SYN 4 #define TCPOLEN_MPTCP_MPC_SYNACK 12 #define TCPOLEN_MPTCP_MPC_ACK 20 #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 #define TCPOLEN_MPTCP_MPJ_SYN 12 #define TCPOLEN_MPTCP_MPJ_SYNACK 16 #define TCPOLEN_MPTCP_MPJ_ACK 24 #define TCPOLEN_MPTCP_DSS_BASE 4 #define TCPOLEN_MPTCP_DSS_ACK32 4 #define TCPOLEN_MPTCP_DSS_ACK64 8 #define TCPOLEN_MPTCP_DSS_MAP32 10 #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 #define TCPOLEN_MPTCP_PORT_LEN 2 #define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 3 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 #define TCPOLEN_MPTCP_FAIL 12 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_THMAC_LEN 8 /* MPTCP MP_CAPABLE flags */ #define MPTCP_VERSION_MASK (0x0F) #define MPTCP_CAP_CHECKSUM_REQD BIT(7) #define MPTCP_CAP_EXTENSIBILITY BIT(6) #define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) #define MPTCP_CAP_HMAC_SHA256 BIT(0) #define MPTCP_CAP_FLAG_MASK (0x1F) /* MPTCP DSS flags */ #define MPTCP_DSS_DATA_FIN BIT(4) #define MPTCP_DSS_DSN64 BIT(3) #define MPTCP_DSS_HAS_MAP BIT(2) #define MPTCP_DSS_ACK64 BIT(1) #define MPTCP_DSS_HAS_ACK BIT(0) #define MPTCP_DSS_FLAG_MASK (0x1F) /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) /* MPTCP MP_PRIO flags */ #define MPTCP_PRIO_BKUP BIT(0) /* MPTCP TCPRST flags */ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ #define MPTCP_WORK_RTX 1 #define MPTCP_FALLBACK_DONE 2 #define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 #define MPTCP_CLEAN_UNA 2 #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 #define MPTCP_DEQUEUE 8 struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; u8 has_rxtstamp; u8 cant_coalesce; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) static inline bool before64(__u64 seq1, __u64 seq2) { return (__s64)(seq1 - seq2) < 0; } #define after64(seq2, seq1) before64(seq1, seq2) struct mptcp_options_received { u64 sndr_key; u64 rcvr_key; u64 data_ack; u64 data_seq; u32 subflow_seq; u16 data_len; __sum16 csum; struct_group(status, u16 suboptions; u16 use_map:1, dsn64:1, data_fin:1, use_ack:1, ack64:1, mpc_map:1, reset_reason:4, reset_transient:1, echo:1, backup:1, deny_join_id0:1, __unused:2; ); u8 join_id; u32 token; u32 nonce; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; u64 ahmac; u64 fail_seq; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | ((nib & 0xF) << 8) | field); } enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, MPTCP_PM_SUBFLOW_ESTABLISHED, MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is * accounted int id_avail_bitmap */ }; enum mptcp_pm_type { MPTCP_PM_TYPE_KERNEL = 0, MPTCP_PM_TYPE_USERSPACE, __MPTCP_PM_TYPE_NR, __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, }; /* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ #define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_RM_ADDR_SIGNAL, }; /* max value of mptcp_addr_info.id */ #define MPTCP_PM_MAX_ADDR_ID U8_MAX struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; struct list_head userspace_pm_local_addr_list; spinlock_t lock; /*protects the whole PM data */ u8 addr_signal; bool server_side; bool work_pending; bool accept_addr; bool accept_subflow; bool remote_deny_join_id0; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; u8 pm_type; u8 subflows; u8 status; DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; }; struct mptcp_pm_local { struct mptcp_addr_info addr; u8 flags; int ifindex; }; struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; u8 flags; int ifindex; struct socket *lsk; }; struct mptcp_data_frag { struct list_head list; u64 data_seq; u16 data_len; u16 offset; u16 overhead; u16 already_sent; struct page *page; }; /* MPTCP connection sock */ struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; u64 local_key; /* protected by the first subflow socket lock * lockless access read */ u64 remote_key; /* same as above */ u64 write_seq; u64 bytes_sent; u64 snd_nxt; u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; u64 bytes_retrans; u64 bytes_consumed; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; * recovery related fields are under data_lock * protection */ u64 bytes_acked; u64 snd_una; u64 wnd_end; u32 last_data_sent; u32 last_data_recv; u32 last_ack_recv; unsigned long timer_ival; u32 token; unsigned long flags; unsigned long cb_flags; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; u8 pending_state; /* A subflow asked to set this sk_state, * protected by the msk data lock */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, nodelay:1, fastopening:1, in_accept_queue:1, free_first:1, rcvspace_init:1; u32 notsent_lowat; int keepalive_cnt; int keepalive_idle; int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; struct sock *first; /* The mptcp ops can safely dereference, using suitable * ONCE annotation, the subflow outside the socket * lock as such sock is freed after close(). */ struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; struct { u32 space; /* bytes copied in last measurement window */ u32 copied; /* bytes copied in this measurement window */ u64 time; /* start time of measurement window */ u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) #define mptcp_next_subflow(__msk, __subflow) \ list_next_entry_circular(__subflow, &((__msk)->conn_list), node) extern struct genl_family mptcp_genl_family; static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); } #ifdef CONFIG_DEBUG_NET /* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ #undef tcp_sk #define tcp_sk(ptr) ({ \ typeof(ptr) _ptr = (ptr); \ WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ }) #define mptcp_sk(ptr) ({ \ typeof(ptr) _ptr = (ptr); \ WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ }) #else /* !CONFIG_DEBUG_NET */ #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) #endif static inline int mptcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); } static inline int mptcp_space_from_win(const struct sock *sk, int win) { return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win); } static inline int __mptcp_space(const struct sock *sk) { return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - sk_rmem_alloc_get(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); return READ_ONCE(msk->first_pending); } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *cur; cur = msk->first_pending; return list_is_last(&cur->list, &msk->rtx_queue) ? NULL : list_next_entry(cur, list); } static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; if (WARN_ON_ONCE(list_empty(&msk->rtx_queue))) return NULL; return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } struct csum_pseudo_header { __be64 data_seq; __be32 subflow_seq; __be16 data_len; __sum16 csum; }; struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, backup : 1, request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; u8 remote_id; u64 local_key; u64 idsn; u32 token; u32 ssn_offset; u64 thmac; u32 local_nonce; u32 remote_nonce; struct mptcp_sock *msk; struct hlist_nulls_node token_node; }; static inline struct mptcp_subflow_request_sock * mptcp_subflow_rsk(const struct request_sock *rsk) { return (struct mptcp_subflow_request_sock *)rsk; } struct mptcp_delegated_action { struct napi_struct napi; struct list_head head; }; DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); #define MPTCP_DELEGATE_SCHEDULED 0 #define MPTCP_DELEGATE_SEND 1 #define MPTCP_DELEGATE_ACK 2 #define MPTCP_DELEGATE_SNDBUF 3 #define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; u64 remote_key; u64 idsn; u64 map_seq; u32 snd_isn; u32 token; u32 rel_write_seq; u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; __wsum map_data_csum; u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, map_csum_reqd : 1, map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ mpc_drop : 1, /* the MPC option has been dropped in a rtx */ __unused : 9; bool data_avail; bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ bool fully_established; /* path validated */ u32 remote_nonce; u64 thmac; u32 local_nonce; u32 remote_token; union { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; u8 stale_count; u32 subflow_id; long delegated_status; unsigned long fail_tout; ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ u32 setsockopt_seq; u32 stale_rcv_tstamp; int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf, * protected by the msk socket lock */ struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; void (*tcp_state_change)(struct sock *sk); void (*tcp_error_report)(struct sock *sk); struct rcu_head rcu; }; static inline struct mptcp_subflow_context * mptcp_subflow_ctx(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code */ return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; } static inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { return subflow->tcp_sock; } static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; WRITE_ONCE(subflow->local_id, -1); } /* Convert reset reasons in MPTCP to enum sk_rst_reason type */ static inline enum sk_rst_reason sk_rst_convert_mptcp_reason(u32 reason) { switch (reason) { case MPTCP_RST_EUNSPEC: return SK_RST_REASON_MPTCP_RST_EUNSPEC; case MPTCP_RST_EMPTCP: return SK_RST_REASON_MPTCP_RST_EMPTCP; case MPTCP_RST_ERESOURCE: return SK_RST_REASON_MPTCP_RST_ERESOURCE; case MPTCP_RST_EPROHIBIT: return SK_RST_REASON_MPTCP_RST_EPROHIBIT; case MPTCP_RST_EWQ2BIG: return SK_RST_REASON_MPTCP_RST_EWQ2BIG; case MPTCP_RST_EBADPERF: return SK_RST_REASON_MPTCP_RST_EBADPERF; case MPTCP_RST_EMIDDLEBOX: return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; default: /* It should not happen, or else errors may occur * in MPTCP layer */ return SK_RST_REASON_ERROR; } } static inline void mptcp_send_active_reset_reason(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); enum sk_rst_reason reason; reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); tcp_send_active_reset(sk, GFP_ATOMIC, reason); } static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - subflow->ssn_offset - subflow->map_subflow_seq; } static inline u64 mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) { return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } void mptcp_subflow_process_delegated(struct sock *ssk, long actions); static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) { long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action); struct mptcp_delegated_action *delegated; bool schedule; /* the caller held the subflow bh socket lock */ lockdep_assert_in_softirq(); /* The implied barrier pairs with tcp_release_cb_override() * mptcp_napi_poll(), and ensures the below list check sees list * updates done prior to delegated status bits changes */ old = set_mask_bits(&subflow->delegated_status, 0, set_bits); if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) { if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) return; delegated = this_cpu_ptr(&mptcp_delegated_actions); schedule = list_empty(&delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head); sock_hold(mptcp_subflow_tcp_sock(subflow)); if (schedule) napi_schedule(&delegated->napi); } } static inline struct mptcp_subflow_context * mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) { struct mptcp_subflow_context *ret; if (list_empty(&delegated->head)) return NULL; ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); list_del_init(&ret->delegated_node); return ret; } int mptcp_is_enabled(const struct net *net); unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); void mptcp_active_disable(struct sock *sk); bool mptcp_active_should_disable(struct sock *ssk); void mptcp_active_enable(struct sock *sk); void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); void mptcp_remote_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); struct mptcp_sched_ops *mptcp_sched_find(const char *name); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_sched_init(void); int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched); void mptcp_release_sched(struct mptcp_sock *msk); void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled); struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) { return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); } static inline bool mptcp_epollin_ready(const struct sock *sk) { u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); if (!data_avail) return false; /* mptcp doesn't have to deal with small skbs in the receive queue, * as it can always coalesce them */ return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); } int mptcp_set_rcvlowat(struct sock *sk, int val); static inline bool __tcp_can_send(const struct sock *ssk) { /* only send if our side has not closed yet */ return ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); } void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); void mptcp_subflow_drop_ctx(struct sock *ssk); static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { sk->sk_data_ready = sock_def_readable; sk->sk_state_change = ctx->tcp_state_change; sk->sk_write_space = sk_stream_write_space; sk->sk_error_report = ctx->tcp_error_report; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) { if (!inet_csk(sk)->icsk_mtup.probe_timestamp) return; sk_stop_timer(sk, &sk->sk_timer); inet_csk(sk)->icsk_mtup.probe_timestamp = 0; } static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) { /* avoid 0 timestamp, as that means no close timeout */ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; } static inline void mptcp_start_tout_timer(struct sock *sk) { mptcp_set_close_tout(sk, tcp_jiffies32); mptcp_reset_tout_timer(mptcp_sk(sk), 0); } static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && READ_ONCE(mptcp_sk(sk)->fully_established); } void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option); u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq); static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) { if (use_64bit) return cur_seq; return __mptcp_expand_seq(old_seq, cur_seq); } void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { return READ_ONCE(msk->snd_data_fin_enable) && READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } static inline u32 mptcp_notsent_lowat(const struct sock *sk) { struct net *net = sock_net(sk); u32 val; val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) { const struct mptcp_sock *msk = mptcp_sk(sk); u32 notsent_bytes; notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); } static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) { return mptcp_stream_memory_free(sk, wake) && __sk_stream_is_writeable(sk, wake); } static inline void mptcp_write_space(struct sock *sk) { /* pairs with memory barrier in mptcp_poll */ smp_mb(); if (mptcp_stream_memory_free(sk, 1)) sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) { struct mptcp_subflow_context *subflow; int ssk_sndbuf, new_sndbuf; if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) return; new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); subflow->cached_sndbuf = ssk_sndbuf; new_sndbuf += ssk_sndbuf; } /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); mptcp_write_space(sk); } /* The called held both the msk socket and the subflow socket locks, * possibly under BH */ static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf) __mptcp_sync_sndbuf(sk); } /* the caller held only the subflow socket lock, either in process or * BH context. Additionally this can be called under the msk data lock, * so we can't acquire such lock here: let the delegate action acquires * the needed locks in suitable order. */ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf)) return; local_bh_disable(); mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF); local_bh_enable(); } void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); #define MPTCP_TOKEN_MAX_RETRIES 4 void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) { mptcp_subflow_rsk(req)->token_node.pprev = NULL; } int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *ssk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); bool mptcp_token_exists(u32 token); struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); void mptcp_pm_destroy(struct mptcp_sock *msk); int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry); bool mptcp_pm_addr_families_match(const struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow); void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, bool prio, bool backup); void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); void mptcp_pm_rm_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, struct mptcp_addr_info *rem, u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id); bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, struct genl_info *info); int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); int mptcp_pm_genl_fill_addr(struct sk_buff *msg, struct netlink_callback *cb, struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); } static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) { return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; } static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) { return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; } static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; if (family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; /* account for 2 trailing 'nop' options */ if (port) len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) { if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX) return -EINVAL; return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, unsigned int opt_size, unsigned int remaining, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { int local_id = READ_ONCE(subflow->local_id); if (local_id < 0) return 0; return local_id; } void __init mptcp_pm_nl_init(void); void mptcp_pm_worker(struct mptcp_sock *msk); void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk); /* called under PM lock */ static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk) { if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk)) WRITE_ONCE(msk->pm.accept_subflow, true); } static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) { spin_lock_bh(&msk->pm.lock); __mptcp_pm_close_subflow(msk); spin_unlock_bh(&msk->pm.lock); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); } void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk) { return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); } static inline bool mptcp_check_fallback(const struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); return __mptcp_check_fallback(msk); } static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) { struct sock *ssk = READ_ONCE(msk->first); return ssk && ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_LISTEN)); } static inline void mptcp_do_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); __mptcp_do_fallback(msk); if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; /* we are in a atomic (BH) scope, override ssk default for data * fin allocation */ ssk->sk_allocation = GFP_ATOMIC; ssk->sk_shutdown |= SEND_SHUTDOWN; tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow) { pr_fallback(msk); subflow->request_mptcp = 0; __mptcp_do_fallback(msk); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { struct mptcp_ext *mpext; mpext = skb ? mptcp_get_ext(skb) : NULL; if (mpext && mpext->infinite_map) return true; return false; } static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) { return (subflow->request_mptcp || subflow->request_join); } static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && is_active_ssk(subflow) && !subflow->conn_finished; } #ifdef CONFIG_SYN_COOKIES void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); void __init mptcp_join_cookie_init(void); #else static inline void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) {} static inline bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) { return false; } static inline void mptcp_join_cookie_init(void) {} #endif #endif /* __MPTCP_PROTOCOL_H */
10 392 28 264 289 205 204 187 288 460 252 289 285 14 218 299 190 3 299 190 270 10 12 10 231 1 194 2 194 6 6 120 315 195 123 23 5 21 314 189 123 10 10 293 33 33 32 32 210 208 210 44 1 107 108 14 93 1 93 32 31 32 32 32 32 4 32 32 29 4 29 1 29 93 93 90 15 15 15 15 446 402 12 404 448 450 83 83 160 162 162 162 36 133 117 18 161 266 266 61 64 65 65 64 65 64 61 5 61 61 61 5 232 206 207 205 3 3 208 2 205 207 207 206 3 206 207 206 204 205 3 253 253 249 253 248 3 1 9 4 6 245 374 376 371 111 143 143 123 28 28 28 28 271 8 263 261 264 263 263 265 264 263 263 3 288 287 289 46 269 272 8 263 25 4 266 60 16 7 39 30 37 75 48 6 4 11 75 19 8 37 316 316 13 310 29 37 292 199 280 50 50 235 14 315 233 218 41 38 221 232 5 5 7 8 8 1 4 3 1 1 2 2 2 1 13 13 107 107 107 107 107 107 103 4 107 107 4 209 193 107 131 131 4 4 6 6 6 8 2 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 // SPDX-License-Identifier: GPL-2.0-only /* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2005-2012 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/mm.h> #include <linux/nsproxy.h> #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netns/hash.h> #include <net/ip.h> #include "nf_internals.h" __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); struct hlist_nulls_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; u32 avg_timeout; u32 count; u32 start_time; bool exiting; bool early_drop; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* serialize hash resizes and nf_ct_iterate_cleanup */ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_INTERVAL_MAX (60ul * HZ) #define GC_SCAN_INTERVAL_MIN (1ul * HZ) /* clamp timeouts to this value (TCP unacked) */ #define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) /* Initial bias pretending we have 100 entries at the upper bound so we don't * wakeup often just because we have three entries with a 1s timeout while still * allowing non-idle machines to wakeup more often when needed. */ #define GC_SCAN_INITIAL_COUNT 100 #define GC_SCAN_INTERVAL_INIT GC_SCAN_INTERVAL_MAX #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) #define MIN_CHAINLEN 50u #define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { /* 1) Acquire the lock */ spin_lock(lock); /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics * It pairs with the smp_store_release() in nf_conntrack_all_unlock() */ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) return; /* fast path failed, unlock */ spin_unlock(lock); /* Slow path 1) get global lock */ spin_lock(&nf_conntrack_locks_all_lock); /* Slow path 2) get the lock we want */ spin_lock(lock); /* Slow path 3) release the global lock */ spin_unlock(&nf_conntrack_locks_all_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_lock); static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) { h1 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS; spin_unlock(&nf_conntrack_locks[h1]); if (h1 != h2) spin_unlock(&nf_conntrack_locks[h2]); } /* return true if we need to recompute hashes (in case hash table was resized) */ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, unsigned int h2, unsigned int sequence) { h1 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS; if (h1 <= h2) { nf_conntrack_lock(&nf_conntrack_locks[h1]); if (h1 != h2) spin_lock_nested(&nf_conntrack_locks[h2], SINGLE_DEPTH_NESTING); } else { nf_conntrack_lock(&nf_conntrack_locks[h2]); spin_lock_nested(&nf_conntrack_locks[h1], SINGLE_DEPTH_NESTING); } if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { nf_conntrack_double_unlock(h1, h2); return true; } return false; } static void nf_conntrack_all_lock(void) __acquires(&nf_conntrack_locks_all_lock) { int i; spin_lock(&nf_conntrack_locks_all_lock); /* For nf_contrack_locks_all, only the latest time when another * CPU will see an update is controlled, by the "release" of the * spin_lock below. * The earliest time is not controlled, an thus KCSAN could detect * a race when nf_conntract_lock() reads the variable. * WRITE_ONCE() is used to ensure the compiler will not * optimize the write. */ WRITE_ONCE(nf_conntrack_locks_all, true); for (i = 0; i < CONNTRACK_LOCKS; i++) { spin_lock(&nf_conntrack_locks[i]); /* This spin_unlock provides the "release" to ensure that * nf_conntrack_locks_all==true is visible to everyone that * acquired spin_lock(&nf_conntrack_locks[]). */ spin_unlock(&nf_conntrack_locks[i]); } } static void nf_conntrack_all_unlock(void) __releases(&nf_conntrack_locks_all_lock) { /* All prior stores must be complete before we clear * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() * might observe the false value but not the entire * critical section. * It pairs with the smp_load_acquire() in nf_conntrack_lock() */ smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); } unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); seqcount_spinlock_t nf_conntrack_generation __read_mostly; static siphash_aligned_key_t nf_conntrack_hash_rnd; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, unsigned int zoneid, const struct net *net) { siphash_key_t key; get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); key = nf_conntrack_hash_rnd; key.key[0] ^= zoneid; key.key[1] ^= net_hash_mix(net); return siphash((void *)tuple, offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend), &key); } static u32 scale_hash(u32 hash) { return reciprocal_scale(hash, nf_conntrack_htable_size); } static u32 __hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple, unsigned int zoneid, unsigned int size) { return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size); } static u32 hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple, unsigned int zoneid) { return scale_hash(hash_conntrack_raw(tuple, zoneid, net)); } static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { struct { __be16 sport; __be16 dport; } _inet_hdr, *inet_hdr; /* Actually only need first 4 bytes to get ports. */ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); if (!inet_hdr) return false; tuple->src.u.udp.port = inet_hdr->sport; tuple->dst.u.udp.port = inet_hdr->dport; return true; } static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, struct net *net, struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; __be32 _addrs[8]; memset(tuple, 0, sizeof(*tuple)); tuple->src.l3num = l3num; switch (l3num) { case NFPROTO_IPV4: nhoff += offsetof(struct iphdr, saddr); size = 2 * sizeof(__be32); break; case NFPROTO_IPV6: nhoff += offsetof(struct ipv6hdr, saddr); size = sizeof(_addrs); break; default: return true; } ap = skb_header_pointer(skb, nhoff, size, _addrs); if (!ap) return false; switch (l3num) { case NFPROTO_IPV4: tuple->src.u3.ip = ap[0]; tuple->dst.u3.ip = ap[1]; break; case NFPROTO_IPV6: memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); break; } tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; switch (protonum) { #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); #endif case IPPROTO_ICMP: return icmp_pkt_to_tuple(skb, dataoff, net, tuple); #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return gre_pkt_to_tuple(skb, dataoff, net, tuple); #endif case IPPROTO_TCP: case IPPROTO_UDP: #ifdef CONFIG_NF_CT_PROTO_UDPLITE case IPPROTO_UDPLITE: #endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif #ifdef CONFIG_NF_CT_PROTO_DCCP case IPPROTO_DCCP: #endif /* fallthrough */ return nf_ct_get_tuple_ports(skb, dataoff, tuple); default: break; } return true; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u_int8_t *protonum) { int dataoff = -1; const struct iphdr *iph; struct iphdr _iph; iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; /* Conntrack defragments packets, we might still see fragments * inside ICMP packets though. */ if (iph->frag_off & htons(IP_OFFSET)) return -1; dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; /* Check bogus IP headers */ if (dataoff > skb->len) { pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", nhoff, iph->ihl << 2, skb->len); return -1; } return dataoff; } #if IS_ENABLED(CONFIG_IPV6) static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u8 *protonum) { int protoff = -1; unsigned int extoff = nhoff + sizeof(struct ipv6hdr); __be16 frag_off; u8 nexthdr; if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), &nexthdr, sizeof(nexthdr)) != 0) { pr_debug("can't get nexthdr\n"); return -1; } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* * (protoff == skb->len) means the packet has not data, just * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("can't find proto in pkt\n"); return -1; } *protonum = nexthdr; return protoff; } #endif static int get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u8 pf, u8 *l4num) { switch (pf) { case NFPROTO_IPV4: return ipv4_get_l4proto(skb, nhoff, l4num); #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: return ipv6_get_l4proto(skb, nhoff, l4num); #endif default: *l4num = 0; break; } return -1; } bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { u8 protonum; int protoff; protoff = get_l4proto(skb, nhoff, l3num, &protonum); if (protoff <= 0) return false; return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse)); inverse->src.l3num = orig->src.l3num; switch (orig->src.l3num) { case NFPROTO_IPV4: inverse->src.u3.ip = orig->dst.u3.ip; inverse->dst.u3.ip = orig->src.u3.ip; break; case NFPROTO_IPV6: inverse->src.u3.in6 = orig->dst.u3.in6; inverse->dst.u3.in6 = orig->src.u3.in6; break; default: break; } inverse->dst.dir = !orig->dst.dir; inverse->dst.protonum = orig->dst.protonum; switch (orig->dst.protonum) { case IPPROTO_ICMP: return nf_conntrack_invert_icmp_tuple(inverse, orig); #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_invert_icmpv6_tuple(inverse, orig); #endif } inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; return true; } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); /* Generate a almost-unique pseudo-id for a given conntrack. * * intentionally doesn't re-use any of the seeds used for hash * table location, we assume id gets exposed to userspace. * * Following nf_conn items do not change throughout lifetime * of the nf_conn: * * 1. nf_conn address * 2. nf_conn->master address (normally NULL) * 3. the associated net namespace * 4. the original direction tuple */ u32 nf_ct_get_id(const struct nf_conn *ct) { static siphash_aligned_key_t ct_id_seed; unsigned long a, b, c, d; net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); a = (unsigned long)ct; b = (unsigned long)ct->master; c = (unsigned long)nf_ct_net(ct); d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), &ct_id_seed); #ifdef CONFIG_64BIT return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); #else return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); #endif } EXPORT_SYMBOL_GPL(nf_ct_get_id); static void clean_from_lists(struct nf_conn *ct) { hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); } #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) /* Released via nf_ct_destroy() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) { struct nf_conn *tmpl, *p; if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); if (!tmpl) return NULL; p = tmpl; tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); if (tmpl != p) { tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; } } else { tmpl = kzalloc(sizeof(*tmpl), flags); if (!tmpl) return NULL; } tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); nf_ct_zone_add(tmpl, zone); refcount_set(&tmpl->ct_general.use, 1); return tmpl; } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); void nf_ct_tmpl_free(struct nf_conn *tmpl) { kfree(tmpl->ext); if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) kfree((char *)tmpl - tmpl->proto.tmpl_padto); else kfree(tmpl); } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); static void destroy_gre_conntrack(struct nf_conn *ct) { #ifdef CONFIG_NF_CT_PROTO_GRE struct nf_conn *master = ct->master; if (master) nf_ct_gre_keymap_destroy(master); #endif } void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; WARN_ON(refcount_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); return; } if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) destroy_gre_conntrack(ct); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ nf_ct_remove_expectations(ct); if (ct->master) nf_ct_put(ct->master); nf_conntrack_free(ct); } EXPORT_SYMBOL(nf_ct_destroy); static void __nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; unsigned int sequence; do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); clean_from_lists(ct); nf_conntrack_double_unlock(hash, reply_hash); } static void nf_ct_delete_from_lists(struct nf_conn *ct) { nf_ct_helper_destroy(ct); local_bh_disable(); __nf_ct_delete_from_lists(ct); local_bh_enable(); } static void nf_ct_add_to_ecache_list(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct)); spin_lock(&cnet->ecache.dying_lock); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &cnet->ecache.dying_list); spin_unlock(&cnet->ecache.dying_lock); #endif } bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; struct net *net; if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) return false; tstamp = nf_conn_tstamp_find(ct); if (tstamp) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) tstamp->stop -= jiffies_to_nsecs(-timeout); } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { /* destroy event was not delivered. nf_ct_put will * be done by event cache worker on redelivery. */ nf_ct_helper_destroy(ct); local_bh_disable(); __nf_ct_delete_from_lists(ct); nf_ct_add_to_ecache_list(ct); local_bh_enable(); nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL); return false; } net = nf_ct_net(ct); if (nf_conntrack_ecache_dwork_pending(net)) nf_conntrack_ecache_work(net, NFCT_ECACHE_DESTROY_SENT); nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone, const struct net *net) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); /* A conntrack can be recreated with the equal tuple, * so we need to check that the conntrack is confirmed */ return nf_ct_tuple_equal(tuple, &h->tuple) && nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && nf_ct_is_confirmed(ct) && net_eq(net, nf_ct_net(ct)); } static inline bool nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) { return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); } /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { if (!refcount_inc_not_zero(&ct->ct_general.use)) return; /* load ->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (nf_ct_should_gc(ct)) nf_ct_kill(ct); nf_ct_put(ct); } /* * Warning : * - Caller must take a reference on returned object * and recheck nf_ct_tuple_equal(tuple, &h->tuple) */ static struct nf_conntrack_tuple_hash * ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; unsigned int bucket, hsize; begin: nf_conntrack_get_ht(&ct_hash, &hsize); bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { struct nf_conn *ct; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { nf_ct_gc_expired(ct); continue; } if (nf_ct_key_equal(h, tuple, zone, net)) return h; } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(n) != bucket) { NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } return NULL; } /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; h = ____nf_conntrack_find(net, zone, tuple, hash); if (h) { /* We have a candidate that matches the tuple we're interested * in, try to obtain a reference and re-check tuple */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { /* re-check key after refcount */ smp_acquire__after_ctrl_dep(); if (likely(nf_ct_key_equal(h, tuple, zone, net))) return h; /* TYPESAFE_BY_RCU recycled the candidate */ nf_ct_put(ct); } h = NULL; } return h; } struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); struct nf_conntrack_tuple_hash *thash; rcu_read_lock(); thash = __nf_conntrack_find_get(net, zone, tuple, hash_conntrack_raw(tuple, zone_id, net)); if (thash) goto out_unlock; rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); if (rid != zone_id) thash = __nf_conntrack_find_get(net, zone, tuple, hash_conntrack_raw(tuple, rid, net)); out_unlock: rcu_read_unlock(); return thash; } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int reply_hash) { hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &nf_conntrack_hash[hash]); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[reply_hash]); } static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext) { /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions * may contain stale pointers to e.g. helper that has been removed. * * The helper can't clear this because the nf_conn object isn't in * any hash and synchronize_rcu() isn't enough because associated skb * might sit in a queue. */ return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid); } static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext) { if (!ext) return true; if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid)) return false; /* inserted into conntrack table, nf_ct_iterate_cleanup() * will find it. Disable nf_ct_ext_find() id check. */ WRITE_ONCE(ext->gen_id, 0); return true; } int nf_conntrack_hash_check_insert(struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int max_chainlen; unsigned int chainlen = 0; unsigned int sequence; int err = -EEXIST; zone = nf_ct_zone(ct); if (!nf_ct_ext_valid_pre(ct->ext)) return -EAGAIN; local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); /* See if there's one in the list already, including reverse */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } chainlen = 0; hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } /* If genid has changed, we can't insert anymore because ct * extensions could have stale pointers and nf_ct_iterate_destroy * might have completed its table scan already. * * Increment of the ext genid right after this check is fine: * nf_ct_iterate_destroy blocks until locks are released. */ if (!nf_ct_ext_valid_post(ct->ext)) { err = -EAGAIN; goto out; } smp_wmb(); /* The caller holds a reference to this object */ refcount_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); local_bh_enable(); return 0; chaintoolong: NF_CT_STAT_INC(net, chaintoolong); err = -ENOSPC; out: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, unsigned int bytes) { struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { struct nf_conn_counter *counter = acct->counter; atomic64_add(packets, &counter[dir].packets); atomic64_add(bytes, &counter[dir].bytes); } } EXPORT_SYMBOL_GPL(nf_ct_acct_add); static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_conn *loser_ct) { struct nf_conn_acct *acct; acct = nf_conn_acct_find(loser_ct); if (acct) { struct nf_conn_counter *counter = acct->counter; unsigned int bytes; /* u32 should be fine since we must have seen one packet. */ bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); } } static void __nf_conntrack_insert_prepare(struct nf_conn *ct) { struct nf_conn_tstamp *tstamp; refcount_inc(&ct->ct_general.use); /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); if (tstamp) tstamp->start = ktime_get_real_ns(); } /** * nf_ct_match_reverse - check if ct1 and ct2 refer to identical flow * @ct1: conntrack in hash table to check against * @ct2: merge candidate * * returns true if ct1 and ct2 happen to refer to the same flow, but * in opposing directions, i.e. * ct1: a:b -> c:d * ct2: c:d -> a:b * for both directions. If so, @ct2 should not have been created * as the skb should have been picked up as ESTABLISHED flow. * But ct1 was not yet committed to hash table before skb that created * ct2 had arrived. * * Note we don't compare netns because ct entries in different net * namespace cannot clash to begin with. * * @return: true if ct1 and ct2 are identical when swapping origin/reply. */ static bool nf_ct_match_reverse(const struct nf_conn *ct1, const struct nf_conn *ct2) { u16 id1, id2; if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct2->tuplehash[IP_CT_DIR_REPLY].tuple)) return false; if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) return false; id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_ORIGINAL); id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_REPLY); if (id1 != id2) return false; id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_REPLY); id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL); return id1 == id2; } static int nf_ct_can_merge(const struct nf_conn *ct, const struct nf_conn *loser_ct) { return nf_ct_match(ct, loser_ct) || nf_ct_match_reverse(ct, loser_ct); } /* caller must hold locks to prevent concurrent changes */ static int __nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); enum ip_conntrack_info ctinfo; struct nf_conn *loser_ct; loser_ct = nf_ct_get(skb, &ctinfo); if (nf_ct_can_merge(ct, loser_ct)) { struct net *net = nf_ct_net(ct); nf_conntrack_get(&ct->ct_general); nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } return NF_DROP; } /** * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry * * @skb: skb that causes the collision * @repl_idx: hash slot for reply direction * * Called when origin or reply direction had a clash. * The skb can be handled without packet drop provided the reply direction * is unique or there the existing entry has the identical tuple in both * directions. * * Caller must hold conntrack table locks to prevent concurrent updates. * * Returns NF_DROP if the clash could not be handled. */ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) { struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct net *net; zone = nf_ct_zone(loser_ct); net = nf_ct_net(loser_ct); /* Reply direction must never result in a clash, unless both origin * and reply tuples are identical. */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) { if (nf_ct_key_equal(h, &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) return __nf_ct_resolve_clash(skb, h); } /* We want the clashing entry to go away real soon: 1 second timeout. */ WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to * ASSURED state, i.e. the entry can always be evicted under * pressure. */ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH; __nf_conntrack_insert_prepare(loser_ct); /* fake add for ORIGINAL dir: we want lookups to only find the entry * already in the table. This also hides the clashing entry from * ctnetlink iteration, i.e. conntrack -L won't show them. */ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } /** * nf_ct_resolve_clash - attempt to handle clash without packet drop * * @skb: skb that causes the clash * @h: tuplehash of the clashing entry already in table * @reply_hash: hash slot for reply direction * * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. * * If there is one, @skb (and the associated, unconfirmed conntrack) has * to be dropped. In case @skb is retransmitted, next conntrack lookup * will find the already-existing entry. * * The major problem with such packet drop is the extra delay added by * the packet loss -- it will take some time for a retransmit to occur * (or the sender to time out when waiting for a reply). * * This function attempts to handle the situation without packet drop. * * If @skb has no NAT transformation or if the colliding entries are * exactly the same, only the to-be-confirmed conntrack entry is discarded * and @skb is associated with the conntrack entry already in the table. * * Failing that, the new, unconfirmed conntrack is still added to the table * provided that the collision only occurs in the ORIGINAL direction. * The new entry will be added only in the non-clashing REPLY direction, * so packets in the ORIGINAL direction will continue to match the existing * entry. The new entry will also have a fixed timeout so it expires -- * due to the collision, it will only see reply traffic. * * Returns NF_DROP if the clash could not be resolved. */ static __cold noinline int nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, u32 reply_hash) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *loser_ct; struct net *net; int ret; loser_ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(loser_ct); l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->allow_clash) goto drop; ret = __nf_ct_resolve_clash(skb, h); if (ret == NF_ACCEPT) return ret; ret = nf_ct_resolve_clash_harder(skb, reply_hash); if (ret == NF_ACCEPT) return ret; drop: NF_CT_STAT_INC(net, drop); NF_CT_STAT_INC(net, insert_failed); return NF_DROP; } /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) { unsigned int chainlen = 0, sequence, max_chainlen; const struct nf_conntrack_zone *zone; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet which created connection will be IP_CT_NEW or for an expected connection, IP_CT_RELATED. */ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; zone = nf_ct_zone(ct); local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = scale_hash(hash); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related * connections for unconfirmed conns. But packet copies and * REJECT will give spurious warnings here. */ /* Another skb with the same unconfirmed conntrack may * win the race. This may happen for bridge(br_flood) * or broadcast/multicast packets do skb_clone with * unconfirmed conntrack. */ if (unlikely(nf_ct_is_confirmed(ct))) { WARN_ON_ONCE(1); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_DROP; } if (!nf_ct_ext_valid_pre(ct->ext)) { NF_CT_STAT_INC(net, insert_failed); goto dying; } /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ ct->status |= IPS_CONFIRMED; if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; } max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } chainlen = 0; hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) { chaintoolong: NF_CT_STAT_INC(net, chaintoolong); NF_CT_STAT_INC(net, insert_failed); ret = NF_DROP; goto dying; } } /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above * stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); /* ext area is still valid (rcu read lock is held, * but will go out of scope soon, we need to remove * this conntrack again. */ if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); nf_conntrack_event_cache(master_ct(ct) ? IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: ret = nf_ct_resolve_clash(skb, h, reply_hash); dying: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); /* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct net *net = nf_ct_net(ignored_conntrack); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); begin: nf_conntrack_get_ht(&ct_hash, &hsize); hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (ct == ignored_conntrack) continue; if (nf_ct_is_expired(ct)) { nf_ct_gc_expired(ct); continue; } if (nf_ct_key_equal(h, tuple, zone, net)) { /* Tuple is taken already, so caller will need to find * a new source port to use. * * Only exception: * If the *original tuples* are identical, then both * conntracks refer to the same flow. * This is a rare situation, it can occur e.g. when * more than one UDP packet is sent from same socket * in different threads. * * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue; NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; } } if (get_nulls_value(n) != hash) { NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } rcu_read_unlock(); return 0; } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); #define NF_CT_EVICTION_RANGE 8 /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ static unsigned int early_drop_list(struct net *net, struct hlist_nulls_head *head) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int drops = 0; struct nf_conn *tmp; hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); continue; } if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) continue; if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* load ->ct_net and ->status after refcount increase */ smp_acquire__after_ctrl_dep(); /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * * We steal the timer reference. If that fails timer has * already fired or someone else deleted it. Just drop ref * and move to next entry. */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && nf_ct_delete(tmp, 0, 0)) drops++; nf_ct_put(tmp); } return drops; } static noinline int early_drop(struct net *net, unsigned int hash) { unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); if (!i) bucket = reciprocal_scale(hash, hsize); else bucket = (bucket + 1) % hsize; drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); return true; } } return false; } static bool gc_worker_skip_ct(const struct nf_conn *ct) { return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); } static bool gc_worker_can_early_drop(const struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; u8 protonum = nf_ct_protonum(ct); if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true; l4proto = nf_ct_l4proto_find(protonum); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true; return false; } static void gc_worker(struct work_struct *work) { unsigned int i, hashsz, nf_conntrack_max95 = 0; u32 end_time, start_time = nfct_time_stamp; struct conntrack_gc_work *gc_work; unsigned int expired_count = 0; unsigned long next_run; s32 delta_time; long count; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; if (i == 0) { gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; gc_work->count = GC_SCAN_INITIAL_COUNT; gc_work->start_time = start_time; } next_run = gc_work->avg_timeout; count = gc_work->count; end_time = start_time + GC_SCAN_MAX_DURATION; do { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; struct nf_conn *tmp; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); if (i >= hashsz) { rcu_read_unlock(); break; } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; struct net *net; long expires; tmp = nf_ct_tuplehash_to_ctrack(h); if (expired_count > GC_SCAN_EXPIRED_MAX) { rcu_read_unlock(); gc_work->next_bucket = i; gc_work->avg_timeout = next_run; gc_work->count = count; delta_time = nfct_time_stamp - gc_work->start_time; /* re-sched immediately if total cycle time is exceeded */ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX; goto early_exit; } if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; continue; } expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); expires = (expires - (long)next_run) / ++count; next_run += expires; if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; net = nf_ct_net(tmp); cnet = nf_ct_pernet(net); if (atomic_read(&cnet->count) < nf_conntrack_max95) continue; /* need to take reference to avoid possible races */ if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* load ->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; } if (gc_worker_can_early_drop(tmp)) { nf_ct_kill(tmp); expired_count++; } nf_ct_put(tmp); } /* could check get_nulls_value() here and restart if ct * was moved to another chain. But given gc is best-effort * we will just continue with next hash slot. */ rcu_read_unlock(); cond_resched(); i++; delta_time = nfct_time_stamp - end_time; if (delta_time > 0 && i < hashsz) { gc_work->avg_timeout = next_run; gc_work->count = count; gc_work->next_bucket = i; next_run = 0; goto early_exit; } } while (i < hashsz); gc_work->next_bucket = 0; next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX); delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1); if (next_run > (unsigned long)delta_time) next_run -= delta_time; else next_run = 1; early_exit: if (gc_work->exiting) return; if (next_run) gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); gc_work->exiting = false; } static struct nf_conn * __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); unsigned int ct_count; struct nf_conn *ct; /* We don't want any race condition at early drop stage */ ct_count = atomic_inc_return(&cnet->count); if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) { if (!early_drop(net, hash)) { if (!conntrack_gc_work.early_drop) conntrack_gc_work.early_drop = true; atomic_dec(&cnet->count); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); } } /* * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_TYPESAFE_BY_RCU. */ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) goto out; spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset_after(ct, 0, __nfct_init_offset); nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ refcount_set(&ct->ct_general.use, 0); return ct; out: atomic_dec(&cnet->count); return ERR_PTR(-ENOMEM); } struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) { return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct nf_conntrack_net *cnet; /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ WARN_ON(refcount_read(&ct->ct_general.use) != 0); if (ct->status & IPS_SRC_NAT_DONE) { const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook) nat_hook->remove_nat_bysrc(ct); rcu_read_unlock(); } kfree(ct->ext); kmem_cache_free(nf_conntrack_cachep, ct); cnet = nf_ct_pernet(net); smp_mb__before_atomic(); atomic_dec(&cnet->count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct sk_buff *skb, unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif struct nf_conntrack_expect *exp = NULL; const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; struct nf_conntrack_net *cnet; if (!nf_ct_invert_tuple(&repl_tuple, tuple)) return NULL; zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) return ERR_CAST(ct); if (!nf_ct_add_synproxy(ct, tmpl)) { nf_conntrack_free(ct); return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; if (timeout_ext) nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), GFP_ATOMIC); nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_labels_ext_add(ct); #ifdef CONFIG_NF_CONNTRACK_EVENTS ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; if ((ecache || net->ct.sysctl_events) && !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, ecache ? ecache->expmask : 0, GFP_ATOMIC)) { nf_conntrack_free(ct); return ERR_PTR(-ENOMEM); } #endif cnet = nf_ct_pernet(net); if (cnet->expect_count) { spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl)); if (exp) { /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ ct->master = exp->master; if (exp->helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; #endif NF_CT_STAT_INC(net, expect_new); } spin_unlock_bh(&nf_conntrack_expect_lock); } if (!exp && tmpl) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); /* Other CPU might have obtained a pointer to this object before it was * released. Because refcount is 0, refcount_inc_not_zero() will fail. * * After refcount_set(1) it will succeed; ensure that zeroing of * ct->status and the correct ct->net pointer are visible; else other * core might observe CONFIRMED bit which means the entry is valid and * in the hash table, but its not (anymore). */ smp_wmb(); /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); if (exp) { if (exp->expectfn) exp->expectfn(ct, exp); nf_ct_expect_put(exp); } return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns 0, sets skb->_nfct | ctinfo */ static int resolve_normal_ct(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; u32 hash, zone_id, rid; struct nf_conn *ct; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, &tuple)) return 0; /* look for tuple match */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); hash = hash_conntrack_raw(&tuple, zone_id, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); if (zone_id != rid) { u32 tmp = hash_conntrack_raw(&tuple, rid, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp); } } if (!h) { h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; if (IS_ERR(h)) return PTR_ERR(h); } ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { ctinfo = IP_CT_ESTABLISHED_REPLY; } else { unsigned long status = READ_ONCE(ct->status); /* Once we've had two way comms, always ESTABLISHED. */ if (likely(status & IPS_SEEN_REPLY)) ctinfo = IP_CT_ESTABLISHED; else if (status & IPS_EXPECTED) ctinfo = IP_CT_RELATED; else ctinfo = IP_CT_NEW; } nf_ct_set(skb, ct, ctinfo); return 0; } /* * icmp packets need special treatment to handle error messages that are * related to a connection. * * Callers need to check if skb has a conntrack assigned when this * helper returns; in such case skb belongs to an already known connection. */ static unsigned int __cold nf_conntrack_handle_icmp(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u8 protonum, const struct nf_hook_state *state) { int ret; if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP) ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state); #if IS_ENABLED(CONFIG_IPV6) else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6) ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state); #endif else return NF_ACCEPT; if (ret <= 0) NF_CT_STAT_INC_ATOMIC(state->net, error); return ret; } static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, enum ip_conntrack_info ctinfo) { const unsigned int *timeout = nf_ct_timeout_lookup(ct); if (!timeout) timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Returns verdict for packet, or -1 for invalid. */ static int nf_conntrack_handle_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: return nf_conntrack_tcp_packet(ct, skb, dataoff, ctinfo, state); case IPPROTO_UDP: return nf_conntrack_udp_packet(ct, skb, dataoff, ctinfo, state); case IPPROTO_ICMP: return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE case IPPROTO_UDPLITE: return nf_conntrack_udplite_packet(ct, skb, dataoff, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return nf_conntrack_sctp_packet(ct, skb, dataoff, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_DCCP case IPPROTO_DCCP: return nf_conntrack_dccp_packet(ct, skb, dataoff, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return nf_conntrack_gre_packet(ct, skb, dataoff, ctinfo, state); #endif } return generic_packet(ct, skb, ctinfo); } unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; int dataoff, ret; tmpl = nf_ct_get(skb, &ctinfo); if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ if ((tmpl && !nf_ct_is_template(tmpl)) || ctinfo == IP_CT_UNTRACKED) return NF_ACCEPT; skb->_nfct = 0; } /* rcu_read_lock()ed by nf_hook_thresh */ dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum); if (dataoff <= 0) { NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; } if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); if (ret <= 0) { ret = -ret; goto out; } /* ICMP[v6] protocol trackers may assign one conntrack. */ if (skb->_nfct) goto out; } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); ret = NF_DROP; goto out; } ct = nf_ct_get(skb, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; } ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ nf_ct_put(ct); skb->_nfct = 0; /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; NF_CT_STAT_INC_ATOMIC(state->net, invalid); if (ret == NF_DROP) NF_CT_STAT_INC_ATOMIC(state->net, drop); ret = -ret; goto out; } if (ctinfo == IP_CT_ESTABLISHED_REPLY && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: if (tmpl) nf_ct_put(tmpl); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_in); /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, u32 extra_jiffies, unsigned int bytes) { /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) goto acct; /* If not in hash table, timer will not be active yet */ if (nf_ct_is_confirmed(ct)) extra_jiffies += nfct_time_stamp; if (READ_ONCE(ct->timeout) != extra_jiffies) WRITE_ONCE(ct->timeout, extra_jiffies); acct: if (bytes) nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb) { nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len); return nf_ct_delete(ct, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/mutex.h> /* Generic function for tcp/udp/sctp/dccp and alike. */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, }; EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) { if (!tb[CTA_PROTO_SRC_PORT]) return -EINVAL; t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); } if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) { if (!tb[CTA_PROTO_DST_PORT]) return -EINVAL; t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); } return 0; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); unsigned int nf_ct_port_nlattr_tuple_size(void) { static unsigned int size __read_mostly; if (!size) size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); return size; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ nf_ct_set(nskb, ct, ctinfo); nf_conntrack_get(skb_nfct(nskb)); } /* This packet is coming from userspace via nf_queue, complete the packet * processing after the helper invocation in nf_confirm(). */ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { const struct nf_conntrack_helper *helper; const struct nf_conn_help *help; int protoff; help = nfct_help(ct); if (!help) return NF_ACCEPT; helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return NF_ACCEPT; switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: protoff = skb_network_offset(skb) + ip_hdrlen(skb); break; #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: { __be16 frag_off; u8 pnum; pnum = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) return NF_ACCEPT; break; } #endif default: return NF_ACCEPT; } if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } } /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; return nf_confirm_cthelper(skb, ct, ctinfo); } static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { const struct nf_conntrack_tuple *src_tuple; const struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple srctuple; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (ct) { src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); return true; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), NFPROTO_IPV4, dev_net(skb->dev), &srctuple)) return false; hash = nf_conntrack_find_get(dev_net(skb->dev), &nf_ct_zone_dflt, &srctuple); if (!hash) return false; ct = nf_ct_tuplehash_to_ctrack(hash); src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); nf_ct_put(ct); return true; } /* Bring out ya dead! */ static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket]; if (hlist_nulls_empty(hslot)) continue; lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); hlist_nulls_for_each_entry(h, n, hslot, hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) continue; /* All nf_conn objects are added to hash table twice, one * for original direction tuple, once for the reply tuple. * * Exception: In the IPS_NAT_CLASH case, only the reply * tuple is added (the original tuple already existed for * a different object). * * We only need to call the iterator once for each * conntrack, so we just use the 'reply' direction * tuple while iterating. */ ct = nf_ct_tuplehash_to_ctrack(h); if (iter_data->net && !net_eq(iter_data->net, nf_ct_net(ct))) continue; if (iter(ct, iter_data->data)) goto found; } spin_unlock(lockp); local_bh_enable(); cond_resched(); } return NULL; found: refcount_inc(&ct->ct_general.use); spin_unlock(lockp); local_bh_enable(); return ct; } static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data) { unsigned int bucket = 0; struct nf_conn *ct; might_sleep(); mutex_lock(&nf_conntrack_mutex); while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) { /* Time to push up daises... */ nf_ct_delete(ct, iter_data->portid, iter_data->report); nf_ct_put(ct); cond_resched(); } mutex_unlock(&nf_conntrack_mutex); } void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data) { struct net *net = iter_data->net; struct nf_conntrack_net *cnet = nf_ct_pernet(net); might_sleep(); if (atomic_read(&cnet->count) == 0) return; nf_ct_iterate_cleanup(iter, iter_data); } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); /** * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table * @iter: callback to invoke for each conntrack * @data: data to pass to @iter * * Like nf_ct_iterate_cleanup, but first marks conntracks on the * unconfirmed list as dying (so they will not be inserted into * main table). * * Can only be called in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) { struct nf_ct_iter_data iter_data = {}; struct net *net; down_read(&net_rwsem); for_each_net(net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (atomic_read(&cnet->count) == 0) continue; nf_queue_nf_hook_drop(net); } up_read(&net_rwsem); /* Need to wait for netns cleanup worker to finish, if its * running -- it might have deleted a net namespace from * the global list, so hook drop above might not have * affected all namespaces. */ net_ns_barrier(); /* a skb w. unconfirmed conntrack could have been reinjected just * before we called nf_queue_nf_hook_drop(). * * This makes sure its inserted into conntrack table. */ synchronize_net(); nf_ct_ext_bump_genid(); iter_data.data = data; nf_ct_iterate_cleanup(iter, &iter_data); /* Another cpu might be in a rcu read section with * rcu protected pointer cleared in iter callback * or hidden via nf_ct_ext_bump_genid() above. * * Wait until those are done. */ synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { return 1; } void nf_conntrack_cleanup_start(void) { cleanup_nf_conntrack_bpf(); conntrack_gc_work.exiting = true; } void nf_conntrack_cleanup_end(void) { RCU_INIT_POINTER(nf_ct_hook, NULL); cancel_delayed_work_sync(&conntrack_gc_work.dwork); kvfree(nf_conntrack_hash); nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); nf_conntrack_expect_fini(); kmem_cache_destroy(nf_conntrack_cachep); } /* * Mishearing the voices in his head, our hero wonders how he's * supposed to kill the mall. */ void nf_conntrack_cleanup_net(struct net *net) { LIST_HEAD(single); list_add(&net->exit_list, &single); nf_conntrack_cleanup_net_list(&single); } void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) { struct nf_ct_iter_data iter_data = {}; struct net *net; int busy; /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_rcu_expedited(); i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); iter_data.net = net; nf_ct_iterate_cleanup_net(kill_all, &iter_data); if (atomic_read(&cnet->count) != 0) busy = 1; } if (busy) { schedule(); goto i_see_dead_people; } list_for_each_entry(net, net_exit_list, exit_list) { nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; unsigned int nr_slots, i; if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) for (i = 0; i < nr_slots; i++) INIT_HLIST_NULLS_HEAD(&hash[i], i); return hash; } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int nf_conntrack_hash_resize(unsigned int hashsize) { int i, bucket; unsigned int old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; if (!hashsize) return -EINVAL; hash = nf_ct_alloc_hashtable(&hashsize, 1); if (!hash) return -ENOMEM; mutex_lock(&nf_conntrack_mutex); old_size = nf_conntrack_htable_size; if (old_size == hashsize) { mutex_unlock(&nf_conntrack_mutex); kvfree(hash); return 0; } local_bh_disable(); nf_conntrack_all_lock(); write_seqcount_begin(&nf_conntrack_generation); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the locks. */ for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { unsigned int zone_id; h = hlist_nulls_entry(nf_conntrack_hash[i].first, struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h)); bucket = __hash_conntrack(nf_ct_net(ct), &h->tuple, zone_id, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } old_hash = nf_conntrack_hash; nf_conntrack_hash = hash; nf_conntrack_htable_size = hashsize; write_seqcount_end(&nf_conntrack_generation); nf_conntrack_all_unlock(); local_bh_enable(); mutex_unlock(&nf_conntrack_mutex); synchronize_net(); kvfree(old_hash); return 0; } int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) { unsigned int hashsize; int rc; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_hash) return param_set_uint(val, kp); rc = kstrtouint(val, 0, &hashsize); if (rc) return rc; return nf_conntrack_hash_resize(hashsize); } int nf_conntrack_init_start(void) { unsigned long nr_pages = totalram_pages(); int max_factor = 8; int ret = -ENOMEM; int i; seqcount_spinlock_init(&nf_conntrack_generation, &nf_conntrack_locks_all_lock); for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); if (!nf_conntrack_htable_size) { nf_conntrack_htable_size = (((nr_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); if (BITS_PER_LONG >= 64 && nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) nf_conntrack_htable_size = 262144; else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 65536; if (nf_conntrack_htable_size < 1024) nf_conntrack_htable_size = 1024; /* Use a max. factor of one by default to keep the average * hash chain length at 2 entries. Each entry has to be added * twice (once for original direction, once for reply). * When a table size is given we use the old value of 8 to * avoid implicit reduction of the max entries setting. */ max_factor = 1; } nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); if (!nf_conntrack_hash) return -ENOMEM; nf_conntrack_max = max_factor * nf_conntrack_htable_size; nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), NFCT_INFOMASK + 1, SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; ret = nf_conntrack_expect_init(); if (ret < 0) goto err_expect; ret = nf_conntrack_helper_init(); if (ret < 0) goto err_helper; ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); ret = register_nf_conntrack_bpf(); if (ret < 0) goto err_kfunc; return 0; err_kfunc: cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_conntrack_proto_fini(); err_proto: nf_conntrack_helper_fini(); err_helper: nf_conntrack_expect_fini(); err_expect: kmem_cache_destroy(nf_conntrack_cachep); err_cachep: kvfree(nf_conntrack_hash); return ret; } static void nf_conntrack_set_closing(struct nf_conntrack *nfct) { struct nf_conn *ct = nf_ct_to_nf_conn(nfct); switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: nf_conntrack_tcp_set_closing(ct); break; } } static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) { RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); } /* * We need to use special "null" values, not used in hash table */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) int nf_conntrack_init_net(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); int ret = -ENOMEM; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); atomic_set(&cnet->count, 0); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) return ret; ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; nf_conntrack_acct_pernet_init(net); nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_proto_pernet_init(net); return 0; err_expect: free_percpu(net->ct.stat); return ret; } /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) { if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) return -EPERM; __nf_ct_set_timeout(ct, timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; return 0; } EXPORT_SYMBOL_GPL(__nf_ct_change_timeout); void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off) { unsigned int bit; /* Ignore these unchangable bits */ on &= ~IPS_UNCHANGEABLE_MASK; off &= ~IPS_UNCHANGEABLE_MASK; for (bit = 0; bit < __IPS_MAX_BIT; bit++) { if (on & (1 << bit)) set_bit(bit, &ct->status); else if (off & (1 << bit)) clear_bit(bit, &ct->status); } } EXPORT_SYMBOL_GPL(__nf_ct_change_status); int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status) { unsigned long d; d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) /* unchangeable */ return -EBUSY; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ return -EBUSY; __nf_ct_change_status(ct, status, 0); return 0; } EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
215 64 64 53 10 15 15 15 4 2 2 2 1 1 2 1 13 14 5 2 2 19 14 1 5 3 1 5 2 3 2 1 4 3 3 1 1 2 63 6 1 1 1 1 22 9 5 1 32 2 5 1 2 2 40 33 57 1 11 1 10 10 4 6 7 6 3 3 4 4 4 4 2 2 5 5 5 2 3 3 1 5 7 3 2 17 1 3 3 2 34 20 4 1 11 9 12 2 9 10 3 1 1 2 4 5 2 2 2 195 195 196 1 69 130 125 4 38 32 55 1 15 16 3 13 13 13 1 1 1 12 1 10 28 2 2 2 4 2 16 7 6 2 2 7 1 2 4 4 1 8 6 2 1 3 1 1 1 1 1 2 1 7 7 1 12 8 1 5 3 2 3 8 18 2 1 11 1 10 18 16 1 3 2 1 2 1 1 1 1 1 1 1 1 1 1 1 3 20 13 15 90 91 91 92 1 2 4 33 52 24 19 2 3 3 2 4 20 22 2 24 24 24 24 24 162 160 162 138 24 2 1 2 3 1 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2021, Red Hat. */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <net/sock.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" #define MIN_INFO_OPTLEN_SIZE 16 #define MIN_FULL_INFO_OPTLEN_SIZE 40 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { msk_owned_by_me(msk); if (likely(!__mptcp_check_fallback(msk))) return NULL; return msk->first; } static u32 sockopt_seq_reset(const struct sock *sk) { sock_owned_by_me(sk); /* Highbits contain state. Allows to distinguish sockopt_seq * of listener and established: * s0 = new_listener() * sockopt(s0) - seq is 1 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) * sockopt(s0) - seq increments to 2 on s0 * sockopt(s1) // seq increments to 2 on s1 (different option) * new ssk completes join, inherits options from s0 // seq 2 * Needs sync from mptcp join logic, but ssk->seq == msk->seq * * Set High order bits to sk_state so ssk->seq == msk->seq test * will fail. */ return (u32)sk->sk_state << 24u; } static void sockopt_seq_inc(struct mptcp_sock *msk) { u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; } static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen, int *val) { if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(val, optval, sizeof(*val))) return -EFAULT; return 0; } static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); switch (optname) { case SO_DEBUG: sock_valbool_flag(ssk, SOCK_DBG, !!val); break; case SO_KEEPALIVE: if (ssk->sk_prot->keepalive) ssk->sk_prot->keepalive(ssk, !!val); sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); break; case SO_PRIORITY: WRITE_ONCE(ssk->sk_priority, val); break; case SO_SNDBUF: case SO_SNDBUFFORCE: ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; break; case SO_RCVBUF: case SO_RCVBUFFORCE: ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); break; case SO_MARK: if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { WRITE_ONCE(ssk->sk_mark, sk->sk_mark); sk_dst_reset(ssk); } break; case SO_INCOMING_CPU: WRITE_ONCE(ssk->sk_incoming_cpu, val); break; } subflow->setsockopt_seq = msk->setsockopt_seq; unlock_sock_fast(ssk, slow); } release_sock(sk); } static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) { sockptr_t optval = KERNEL_SOCKPTR(&val); struct sock *sk = (struct sock *)msk; int ret; ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, sizeof(val)); if (ret) return ret; mptcp_sol_socket_sync_intval(msk, optname, val); return 0; } static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) { struct sock *sk = (struct sock *)msk; WRITE_ONCE(sk->sk_incoming_cpu, val); mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); } static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) { sockptr_t optval = KERNEL_SOCKPTR(&val); struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; int ret; ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, sizeof(val)); if (ret) return ret; lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); sock_set_timestamp(sk, optname, !!val); unlock_sock_fast(ssk, slow); } release_sock(sk); return 0; } static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { int val, ret; ret = mptcp_get_int_option(msk, optval, optlen, &val); if (ret) return ret; switch (optname) { case SO_KEEPALIVE: case SO_DEBUG: case SO_MARK: case SO_PRIORITY: case SO_SNDBUF: case SO_SNDBUFFORCE: case SO_RCVBUF: case SO_RCVBUFFORCE: return mptcp_sol_socket_intval(msk, optname, val); case SO_INCOMING_CPU: mptcp_so_incoming_cpu(msk, val); return 0; case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); } return -ENOPROTOOPT; } static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; struct so_timestamping timestamping; int ret; if (optlen == sizeof(timestamping)) { if (copy_from_sockptr(&timestamping, optval, sizeof(timestamping))) return -EFAULT; } else if (optlen == sizeof(int)) { memset(&timestamping, 0, sizeof(timestamping)); if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int))) return -EFAULT; } else { return -EINVAL; } ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, KERNEL_SOCKPTR(&timestamping), sizeof(timestamping)); if (ret) return ret; lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); sock_set_timestamping(sk, optname, timestamping); unlock_sock_fast(ssk, slow); } release_sock(sk); return 0; } static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; struct linger ling; sockptr_t kopt; int ret; if (optlen < sizeof(ling)) return -EINVAL; if (copy_from_sockptr(&ling, optval, sizeof(ling))) return -EFAULT; kopt = KERNEL_SOCKPTR(&ling); ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); if (ret) return ret; lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); if (!ling.l_onoff) { sock_reset_flag(ssk, SOCK_LINGER); } else { ssk->sk_lingertime = sk->sk_lingertime; sock_set_flag(ssk, SOCK_LINGER); } subflow->setsockopt_seq = msk->setsockopt_seq; unlock_sock_fast(ssk, slow); } release_sock(sk); return 0; } static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; int ret; switch (optname) { case SO_REUSEPORT: case SO_REUSEADDR: case SO_BINDTODEVICE: case SO_BINDTOIFINDEX: lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { release_sock(sk); return PTR_ERR(ssk); } ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); if (ret == 0) { if (optname == SO_REUSEPORT) sk->sk_reuseport = ssk->sk_reuseport; else if (optname == SO_REUSEADDR) sk->sk_reuse = ssk->sk_reuse; else if (optname == SO_BINDTODEVICE) sk->sk_bound_dev_if = ssk->sk_bound_dev_if; else if (optname == SO_BINDTOIFINDEX) sk->sk_bound_dev_if = ssk->sk_bound_dev_if; } release_sock(sk); return ret; case SO_KEEPALIVE: case SO_PRIORITY: case SO_SNDBUF: case SO_SNDBUFFORCE: case SO_RCVBUF: case SO_RCVBUFFORCE: case SO_MARK: case SO_INCOMING_CPU: case SO_DEBUG: case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); case SO_TIMESTAMPING_OLD: case SO_TIMESTAMPING_NEW: return mptcp_setsockopt_sol_socket_timestamping(msk, optname, optval, optlen); case SO_LINGER: return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); case SO_RCVLOWAT: case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: case SO_BUSY_POLL: case SO_PREFER_BUSY_POLL: case SO_BUSY_POLL_BUDGET: /* No need to copy: only relevant for msk */ return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); case SO_NO_CHECK: case SO_DONTROUTE: case SO_BROADCAST: case SO_BSDCOMPAT: case SO_PASSCRED: case SO_PASSPIDFD: case SO_PASSSEC: case SO_RXQ_OVFL: case SO_WIFI_STATUS: case SO_NOFCS: case SO_SELECT_ERR_QUEUE: return 0; } /* SO_OOBINLINE is not supported, let's avoid the related mess * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, * we must be careful with subflows * * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks * explicitly the sk_protocol field * * SO_PEEK_OFF is unsupported, as it is for plain TCP * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, * but likely needs careful design * * SO_ZEROCOPY is currently unsupported, TODO in sndmsg * SO_TXTIME is currently unsupported */ return -EOPNOTSUPP; } static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; int ret = -EOPNOTSUPP; struct sock *ssk; switch (optname) { case IPV6_V6ONLY: case IPV6_TRANSPARENT: case IPV6_FREEBIND: lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { release_sock(sk); return PTR_ERR(ssk); } ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); if (ret != 0) { release_sock(sk); return ret; } sockopt_seq_inc(msk); switch (optname) { case IPV6_V6ONLY: sk->sk_ipv6only = ssk->sk_ipv6only; break; case IPV6_TRANSPARENT: inet_assign_bit(TRANSPARENT, sk, inet_test_bit(TRANSPARENT, ssk)); break; case IPV6_FREEBIND: inet_assign_bit(FREEBIND, sk, inet_test_bit(FREEBIND, ssk)); break; } release_sock(sk); break; } return ret; } static bool mptcp_supported_sockopt(int level, int optname) { if (level == SOL_IP) { switch (optname) { /* should work fine */ case IP_FREEBIND: case IP_TRANSPARENT: case IP_BIND_ADDRESS_NO_PORT: case IP_LOCAL_PORT_RANGE: /* the following are control cmsg related */ case IP_PKTINFO: case IP_RECVTTL: case IP_RECVTOS: case IP_RECVOPTS: case IP_RETOPTS: case IP_PASSSEC: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: case IP_RECVFRAGSIZE: /* common stuff that need some love */ case IP_TOS: case IP_TTL: case IP_MTU_DISCOVER: case IP_RECVERR: /* possibly less common may deserve some love */ case IP_MINTTL: /* the following is apparently a no-op for plain TCP */ case IP_RECVERR_RFC4884: return true; } /* IP_OPTIONS is not supported, needs subflow care */ /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal * with mcast stuff */ /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ return false; } if (level == SOL_IPV6) { switch (optname) { case IPV6_V6ONLY: /* the following are control cmsg related */ case IPV6_RECVPKTINFO: case IPV6_2292PKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_2292HOPLIMIT: case IPV6_RECVRTHDR: case IPV6_2292RTHDR: case IPV6_RECVHOPOPTS: case IPV6_2292HOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_2292DSTOPTS: case IPV6_RECVTCLASS: case IPV6_FLOWINFO: case IPV6_RECVPATHMTU: case IPV6_RECVORIGDSTADDR: case IPV6_RECVFRAGSIZE: /* the following ones need some love but are quite common */ case IPV6_TCLASS: case IPV6_TRANSPARENT: case IPV6_FREEBIND: case IPV6_PKTINFO: case IPV6_2292PKTOPTIONS: case IPV6_UNICAST_HOPS: case IPV6_MTU_DISCOVER: case IPV6_MTU: case IPV6_RECVERR: case IPV6_FLOWINFO_SEND: case IPV6_FLOWLABEL_MGR: case IPV6_MINHOPCOUNT: case IPV6_DONTFRAG: case IPV6_AUTOFLOWLABEL: /* the following one is a no-op for plain TCP */ case IPV6_RECVERR_RFC4884: return true; } /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are * not supported */ /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, * IPV6_MULTICAST_IF, IPV6_ADDRFORM, * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER * are not supported better not deal with mcast */ /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ return false; } if (level == SOL_TCP) { switch (optname) { /* the following are no-op or should work just fine */ case TCP_THIN_DUPACK: case TCP_DEFER_ACCEPT: /* the following need some love */ case TCP_MAXSEG: case TCP_NODELAY: case TCP_THIN_LINEAR_TIMEOUTS: case TCP_CONGESTION: case TCP_CORK: case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPCNT: case TCP_SYNCNT: case TCP_SAVE_SYN: case TCP_LINGER2: case TCP_WINDOW_CLAMP: case TCP_QUICKACK: case TCP_USER_TIMEOUT: case TCP_TIMESTAMP: case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return true; } /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ } return false; } static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; char name[TCP_CA_NAME_MAX]; bool cap_net_admin; int ret; if (optlen < 1) return -EINVAL; ret = strncpy_from_sockptr(name, optval, min_t(long, TCP_CA_NAME_MAX - 1, optlen)); if (ret < 0) return -EFAULT; name[ret] = 0; cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); ret = 0; lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int err; lock_sock(ssk); err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); if (err < 0 && ret == 0) ret = err; subflow->setsockopt_seq = msk->setsockopt_seq; release_sock(ssk); } if (ret == 0) strscpy(msk->ca_name, name, sizeof(msk->ca_name)); release_sock(sk); return ret; } static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, int (*set_val)(struct sock *, int), int *msk_val, int val) { struct mptcp_subflow_context *subflow; int err = 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int ret; lock_sock(ssk); ret = set_val(ssk, val); err = err ? : ret; release_sock(ssk); } if (!err) { *msk_val = val; sockopt_seq_inc(msk); } return err; } static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; sockopt_seq_inc(msk); msk->cork = !!val; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); lock_sock(ssk); __tcp_sock_set_cork(ssk, !!val); release_sock(ssk); } if (!val) mptcp_check_and_set_pending(sk); return 0; } static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; sockopt_seq_inc(msk); msk->nodelay = !!val; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); lock_sock(ssk); __tcp_sock_set_nodelay(ssk, !!val); release_sock(ssk); } if (val) mptcp_check_and_set_pending(sk); return 0; } static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; int err; err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); if (err != 0) return err; lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { release_sock(sk); return PTR_ERR(ssk); } switch (optname) { case IP_FREEBIND: inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); break; case IP_TRANSPARENT: inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); break; case IP_BIND_ADDRESS_NO_PORT: inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); break; case IP_LOCAL_PORT_RANGE: WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); break; default: release_sock(sk); WARN_ON_ONCE(1); return -EOPNOTSUPP; } sockopt_seq_inc(msk); release_sock(sk); return 0; } static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; int err, val; err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); if (err != 0) return err; lock_sock(sk); sockopt_seq_inc(msk); val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; slow = lock_sock_fast(ssk); __ip_sock_set_tos(ssk, val); unlock_sock_fast(ssk, slow); } release_sock(sk); return 0; } static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { switch (optname) { case IP_FREEBIND: case IP_TRANSPARENT: case IP_BIND_ADDRESS_NO_PORT: case IP_LOCAL_PORT_RANGE: return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); case IP_TOS: return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); } return -EOPNOTSUPP; } static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; int ret; /* Limit to first subflow, before the connection establishment */ lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { ret = PTR_ERR(ssk); goto unlock; } ret = tcp_setsockopt(ssk, level, optname, optval, optlen); unlock: release_sock(sk); return ret; } static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (void *)msk; int ret, val; switch (optname) { case TCP_ULP: return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); case TCP_DEFER_ACCEPT: /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); return 0; case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); } ret = mptcp_get_int_option(msk, optval, optlen, &val); if (ret) return ret; lock_sock(sk); switch (optname) { case TCP_INQ: if (val < 0 || val > 1) ret = -EINVAL; else msk->recvmsg_inq = !!val; break; case TCP_NOTSENT_LOWAT: WRITE_ONCE(msk->notsent_lowat, val); mptcp_write_space(sk); break; case TCP_CORK: ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); break; case TCP_NODELAY: ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); break; case TCP_KEEPIDLE: ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, &tcp_sock_set_keepidle_locked, &msk->keepalive_idle, val); break; case TCP_KEEPINTVL: ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, &tcp_sock_set_keepintvl, &msk->keepalive_intvl, val); break; case TCP_KEEPCNT: ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, &tcp_sock_set_keepcnt, &msk->keepalive_cnt, val); break; default: ret = -ENOPROTOOPT; } release_sock(sk); return ret; } int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; pr_debug("msk=%p\n", msk); if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); if (!mptcp_supported_sockopt(level, optname)) return -ENOPROTOOPT; /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the * MPTCP-level socket to configure the subflows until the subflow * is in TCP fallback, when TCP socket options are passed through * to the one remaining subflow. */ lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); release_sock(sk); if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); if (level == SOL_IP) return mptcp_setsockopt_v4(msk, optname, optval, optlen); if (level == SOL_IPV6) return mptcp_setsockopt_v6(msk, optname, optval, optlen); if (level == SOL_TCP) return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); return -EOPNOTSUPP; } static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; int ret; lock_sock(sk); ssk = msk->first; if (ssk) { ret = tcp_getsockopt(ssk, level, optname, optval, optlen); goto out; } ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { ret = PTR_ERR(ssk); goto out; } ret = tcp_getsockopt(ssk, level, optname, optval, optlen); out: release_sock(sk); return ret; } void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { struct sock *sk = (struct sock *)msk; u32 flags = 0; bool slow; u32 now; memset(info, 0, sizeof(*info)); info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); if (inet_sk_state_load(sk) == TCP_LISTEN) return; /* The following limits only make sense for the in-kernel PM */ if (mptcp_pm_is_kernel(msk)) { info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); info->mptcpi_add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); info->mptcpi_add_addr_accepted_max = mptcp_pm_get_add_addr_accept_max(msk); info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); } if (__mptcp_check_fallback(msk)) flags |= MPTCP_INFO_FLAG_FALLBACK; if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; slow = lock_sock_fast(sk); info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); info->mptcpi_token = msk->token; info->mptcpi_write_seq = msk->write_seq; info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; info->mptcpi_bytes_sent = msk->bytes_sent; info->mptcpi_bytes_received = msk->bytes_received; info->mptcpi_bytes_retrans = msk->bytes_retrans; info->mptcpi_subflows_total = info->mptcpi_subflows + __mptcp_has_initial_subflow(msk); now = tcp_jiffies32; info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); unlock_sock_fast(sk, slow); mptcp_data_lock(sk); info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); info->mptcpi_snd_una = msk->snd_una; info->mptcpi_rcv_nxt = msk->ack_seq; info->mptcpi_bytes_acked = msk->bytes_acked; mptcp_data_unlock(sk); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) { struct mptcp_info m_info; int len; if (get_user(len, optlen)) return -EFAULT; /* When used only to check if a fallback to TCP happened. */ if (len == 0) return 0; len = min_t(unsigned int, len, sizeof(struct mptcp_info)); mptcp_diag_fill_info(msk, &m_info); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &m_info, len)) return -EFAULT; return 0; } static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, char __user *optval, u32 copied, int __user *optlen) { u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); if (copied) copied += sfd->size_subflow_data; else copied = copylen; if (put_user(copied, optlen)) return -EFAULT; if (copy_to_user(optval, sfd, copylen)) return -EFAULT; return 0; } static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, char __user *optval, int __user *optlen) { int len, copylen; if (get_user(len, optlen)) return -EFAULT; /* if mptcp_subflow_data size is changed, need to adjust * this function to deal with programs using old version. */ BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); if (len < MIN_INFO_OPTLEN_SIZE) return -EINVAL; memset(sfd, 0, sizeof(*sfd)); copylen = min_t(unsigned int, len, sizeof(*sfd)); if (copy_from_user(sfd, optval, copylen)) return -EFAULT; /* size_subflow_data is u32, but len is signed */ if (sfd->size_subflow_data > INT_MAX || sfd->size_user > INT_MAX) return -EINVAL; if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || sfd->size_subflow_data > len) return -EINVAL; if (sfd->num_subflows || sfd->size_kernel) return -EINVAL; return len - sfd->size_subflow_data; } static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, int __user *optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; unsigned int sfcount = 0, copied = 0; struct mptcp_subflow_data sfd; char __user *infoptr; int len; len = mptcp_get_subflow_data(&sfd, optval, optlen); if (len < 0) return len; sfd.size_kernel = sizeof(struct tcp_info); sfd.size_user = min_t(unsigned int, sfd.size_user, sizeof(struct tcp_info)); infoptr = optval + sfd.size_subflow_data; lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); ++sfcount; if (len && len >= sfd.size_user) { struct tcp_info info; tcp_get_info(ssk, &info); if (copy_to_user(infoptr, &info, sfd.size_user)) { release_sock(sk); return -EFAULT; } infoptr += sfd.size_user; copied += sfd.size_user; len -= sfd.size_user; } } release_sock(sk); sfd.num_subflows = sfcount; if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) return -EFAULT; return 0; } static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) { const struct inet_sock *inet = inet_sk(sk); memset(a, 0, sizeof(*a)); if (sk->sk_family == AF_INET) { a->sin_local.sin_family = AF_INET; a->sin_local.sin_port = inet->inet_sport; a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; if (!a->sin_local.sin_addr.s_addr) a->sin_local.sin_addr.s_addr = inet->inet_saddr; a->sin_remote.sin_family = AF_INET; a->sin_remote.sin_port = inet->inet_dport; a->sin_remote.sin_addr.s_addr = inet->inet_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); if (WARN_ON_ONCE(!np)) return; a->sin6_local.sin6_family = AF_INET6; a->sin6_local.sin6_port = inet->inet_sport; if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) a->sin6_local.sin6_addr = np->saddr; else a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; a->sin6_remote.sin6_family = AF_INET6; a->sin6_remote.sin6_port = inet->inet_dport; a->sin6_remote.sin6_addr = sk->sk_v6_daddr; #endif } } static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, int __user *optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; unsigned int sfcount = 0, copied = 0; struct mptcp_subflow_data sfd; char __user *addrptr; int len; len = mptcp_get_subflow_data(&sfd, optval, optlen); if (len < 0) return len; sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); sfd.size_user = min_t(unsigned int, sfd.size_user, sizeof(struct mptcp_subflow_addrs)); addrptr = optval + sfd.size_subflow_data; lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); ++sfcount; if (len && len >= sfd.size_user) { struct mptcp_subflow_addrs a; mptcp_get_sub_addrs(ssk, &a); if (copy_to_user(addrptr, &a, sfd.size_user)) { release_sock(sk); return -EFAULT; } addrptr += sfd.size_user; copied += sfd.size_user; len -= sfd.size_user; } } release_sock(sk); sfd.num_subflows = sfcount; if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) return -EFAULT; return 0; } static int mptcp_get_full_info(struct mptcp_full_info *mfi, char __user *optval, int __user *optlen) { int len; BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != MIN_FULL_INFO_OPTLEN_SIZE); if (get_user(len, optlen)) return -EFAULT; if (len < MIN_FULL_INFO_OPTLEN_SIZE) return -EINVAL; memset(mfi, 0, sizeof(*mfi)); if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) return -EFAULT; if (mfi->size_tcpinfo_kernel || mfi->size_sfinfo_kernel || mfi->num_subflows) return -EINVAL; if (mfi->size_sfinfo_user > INT_MAX || mfi->size_tcpinfo_user > INT_MAX) return -EINVAL; return len - MIN_FULL_INFO_OPTLEN_SIZE; } static int mptcp_put_full_info(struct mptcp_full_info *mfi, char __user *optval, u32 copylen, int __user *optlen) { copylen += MIN_FULL_INFO_OPTLEN_SIZE; if (put_user(copylen, optlen)) return -EFAULT; if (copy_to_user(optval, mfi, copylen)) return -EFAULT; return 0; } static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) { unsigned int sfcount = 0, copylen = 0; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; void __user *tcpinfoptr, *sfinfoptr; struct mptcp_full_info mfi; int len; len = mptcp_get_full_info(&mfi, optval, optlen); if (len < 0) return len; /* don't bother filling the mptcp info if there is not enough * user-space-provided storage */ if (len > 0) { mptcp_diag_fill_info(msk, &mfi.mptcp_info); copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); } mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, sizeof(struct tcp_info)); sfinfoptr = u64_to_user_ptr(mfi.subflow_info); mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, sizeof(struct mptcp_subflow_info)); tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_subflow_info sfinfo; struct tcp_info tcp_info; if (sfcount++ >= mfi.size_arrays_user) continue; /* fetch addr/tcp_info only if the user space buffers * are wide enough */ memset(&sfinfo, 0, sizeof(sfinfo)); sfinfo.id = subflow->subflow_id; if (mfi.size_sfinfo_user > offsetof(struct mptcp_subflow_info, addrs)) mptcp_get_sub_addrs(ssk, &sfinfo.addrs); if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) goto fail_release; if (mfi.size_tcpinfo_user) { tcp_get_info(ssk, &tcp_info); if (copy_to_user(tcpinfoptr, &tcp_info, mfi.size_tcpinfo_user)) goto fail_release; } tcpinfoptr += mfi.size_tcpinfo_user; sfinfoptr += mfi.size_sfinfo_user; } release_sock(sk); mfi.num_subflows = sfcount; if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) return -EFAULT; return 0; fail_release: release_sock(sk); return -EFAULT; } static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, int __user *optlen, int val) { int len; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { unsigned char ucval = (unsigned char)val; len = 1; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &ucval, 1)) return -EFAULT; } else { len = min_t(unsigned int, len, sizeof(int)); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; } return 0; } static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { struct sock *sk = (void *)msk; switch (optname) { case TCP_ULP: case TCP_CONGESTION: case TCP_INFO: case TCP_CC_INFO: case TCP_DEFER_ACCEPT: case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); case TCP_INQ: return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); case TCP_CORK: return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); case TCP_KEEPIDLE: return mptcp_put_int_option(msk, optval, optlen, msk->keepalive_idle ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); case TCP_KEEPINTVL: return mptcp_put_int_option(msk, optval, optlen, msk->keepalive_intvl ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); case TCP_KEEPCNT: return mptcp_put_int_option(msk, optval, optlen, msk->keepalive_cnt ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); case TCP_NOTSENT_LOWAT: return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); case TCP_IS_MPTCP: return mptcp_put_int_option(msk, optval, optlen, 1); } return -EOPNOTSUPP; } static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { struct sock *sk = (void *)msk; switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); case IP_BIND_ADDRESS_NO_PORT: return mptcp_put_int_option(msk, optval, optlen, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); case IP_LOCAL_PORT_RANGE: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->local_port_range)); } return -EOPNOTSUPP; } static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { switch (optname) { case MPTCP_INFO: return mptcp_getsockopt_info(msk, optval, optlen); case MPTCP_FULL_INFO: return mptcp_getsockopt_full_info(msk, optval, optlen); case MPTCP_TCPINFO: return mptcp_getsockopt_tcpinfo(msk, optval, optlen); case MPTCP_SUBFLOW_ADDRS: return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); } return -EOPNOTSUPP; } int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; pr_debug("msk=%p\n", msk); /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the * MPTCP-level socket to configure the subflows until the subflow * is in TCP fallback, when socket options are passed through * to the one remaining subflow. */ lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); release_sock(sk); if (ssk) return tcp_getsockopt(ssk, level, optname, optval, option); if (level == SOL_IP) return mptcp_getsockopt_v4(msk, optname, optval, option); if (level == SOL_TCP) return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); if (level == SOL_MPTCP) return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); return -EOPNOTSUPP; } static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) { static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; struct sock *sk = (struct sock *)msk; if (ssk->sk_prot->keepalive) { if (sock_flag(sk, SOCK_KEEPOPEN)) ssk->sk_prot->keepalive(ssk, 1); else ssk->sk_prot->keepalive(ssk, 0); } ssk->sk_priority = sk->sk_priority; ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; ssk->sk_ipv6only = sk->sk_ipv6only; __ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; } if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); } if (sock_flag(sk, SOCK_LINGER)) { ssk->sk_lingertime = sk->sk_lingertime; sock_set_flag(ssk, SOCK_LINGER); } else { sock_reset_flag(ssk, SOCK_LINGER); } if (sk->sk_mark != ssk->sk_mark) { ssk->sk_mark = sk->sk_mark; sk_dst_reset(ssk); } sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) tcp_set_congestion_control(ssk, msk->ca_name, false, true); __tcp_sock_set_cork(ssk, !!msk->cork); __tcp_sock_set_nodelay(ssk, !!msk->nodelay); tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); msk_owned_by_me(msk); ssk->sk_rcvlowat = 0; /* subflows must ignore any latency-related settings: will not affect * the user-space - only the msk is relevant - but will foul the * mptcp scheduler */ tcp_sk(ssk)->notsent_lowat = UINT_MAX; if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { sync_socket_options(msk, ssk); subflow->setsockopt_seq = msk->setsockopt_seq; } } /* unfortunately this is different enough from the tcp version so * that we can't factor it out */ int mptcp_set_rcvlowat(struct sock *sk, int val) { struct mptcp_subflow_context *subflow; int space, cap; /* bpf can land here with a wrong sk type */ if (sk->sk_protocol == IPPROTO_TCP) return -EINVAL; if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); /* Check if we need to signal EPOLLIN right now */ if (mptcp_epollin_ready(sk)) sk->sk_data_ready(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) return 0; space = mptcp_space_from_win(sk, val); if (space <= sk->sk_rcvbuf) return 0; /* propagate the rcvbuf changes to all the subflows */ WRITE_ONCE(sk->sk_rcvbuf, space); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; slow = lock_sock_fast(ssk); WRITE_ONCE(ssk->sk_rcvbuf, space); WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); unlock_sock_fast(ssk, slow); } return 0; }
4 1 588 4 25 25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/ns_common.h> #include <linux/fs_pin.h> struct mnt_namespace { struct ns_common ns; struct mount * root; struct { struct rb_root mounts; /* Protected by namespace_sem */ struct rb_node *mnt_last_node; /* last (rightmost) mount in the rbtree */ struct rb_node *mnt_first_node; /* first (leftmost) mount in the rbtree */ }; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ union { wait_queue_head_t poll; struct rcu_head mnt_ns_rcu; }; u64 event; unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ refcount_t passive; /* number references not pinning @mounts */ } __randomize_layout; struct mnt_pcp { int mnt_count; int mnt_writers; }; struct mountpoint { struct hlist_node m_hash; struct dentry *m_dentry; struct hlist_head m_list; int m_count; }; struct mount { struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; union { struct rb_node mnt_node; /* node in the ns->mounts rbtree */ struct rcu_head mnt_rcu; struct llist_node mnt_llist; }; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else int mnt_count; int mnt_writers; #endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ struct list_head mnt_instance; /* mount instance on sb->s_mounts */ const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct list_head mnt_share; /* circular list of shared mounts */ struct list_head mnt_slave_list;/* list of slave mounts */ struct list_head mnt_slave; /* slave list entry */ struct mount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ union { struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ struct hlist_node mnt_umount; }; struct list_head mnt_umounting; /* list entry for umount propagation */ #ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; #endif int mnt_id; /* mount identifier, reused */ u64 mnt_id_unique; /* mount ID unique until reboot */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; struct hlist_head mnt_stuck_children; } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ static inline struct mount *real_mount(struct vfsmount *mnt) { return container_of(mnt, struct mount, mnt); } static inline int mnt_has_parent(struct mount *mnt) { return mnt != mnt->mnt_parent; } static inline int is_mounted(struct vfsmount *mnt) { /* neither detached nor internal? */ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns); } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); static inline bool __path_is_mountpoint(const struct path *path) { struct mount *m = __lookup_mnt(path->mnt, path->dentry); return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT)); } extern void __detach_mounts(struct dentry *dentry); static inline void detach_mounts(struct dentry *dentry) { if (!d_mountpoint(dentry)) return; __detach_mounts(dentry); } static inline void get_mnt_ns(struct mnt_namespace *ns) { refcount_inc(&ns->ns.count); } extern seqlock_t mount_lock; struct proc_mounts { struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); }; extern const struct seq_operations mounts_op; extern bool __is_local_mountpoint(struct dentry *dentry); static inline bool is_local_mountpoint(struct dentry *dentry) { if (!d_mountpoint(dentry)) return false; return __is_local_mountpoint(dentry); } static inline bool is_anon_ns(struct mnt_namespace *ns) { return ns->seq == 0; } static inline bool mnt_ns_attached(const struct mount *mnt) { return !RB_EMPTY_NODE(&mnt->mnt_node); } static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) { struct mnt_namespace *ns = mnt->mnt_ns; WARN_ON(!mnt_ns_attached(mnt)); if (ns->mnt_last_node == &mnt->mnt_node) ns->mnt_last_node = rb_prev(&mnt->mnt_node); if (ns->mnt_first_node == &mnt->mnt_node) ns->mnt_first_node = rb_next(&mnt->mnt_node); rb_erase(&mnt->mnt_node, &ns->mounts); RB_CLEAR_NODE(&mnt->mnt_node); list_add_tail(&mnt->mnt_list, dt_list); } bool has_locked_children(struct mount *mnt, struct dentry *dentry); struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns, bool previous); static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); }
18 184 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_INTERNAL_H #define __CGROUP_INTERNAL_H #include <linux/cgroup.h> #include <linux/kernfs.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/refcount.h> #include <linux/fs_parser.h> #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; extern void __init enable_debug_cgroup(void); /* * cgroup_path() takes a spin lock. It is good practice not to take * spin locks within trace point handlers, as they are mostly hidden * from normal view. As cgroup_path() can take the kernfs_rename_lock * spin lock, it is best to not call that function from the trace event * handler. * * Note: trace_cgroup_##type##_enabled() is a static branch that will only * be set when the trace event is enabled. */ #define TRACE_CGROUP_PATH(type, cgrp, ...) \ do { \ if (trace_cgroup_##type##_enabled()) { \ unsigned long flags; \ spin_lock_irqsave(&trace_cgroup_path_lock, \ flags); \ cgroup_path(cgrp, trace_cgroup_path, \ TRACE_CGROUP_PATH_LEN); \ trace_cgroup_##type(cgrp, trace_cgroup_path, \ ##__VA_ARGS__); \ spin_unlock_irqrestore(&trace_cgroup_path_lock, \ flags); \ } \ } while (0) /* * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { struct kernfs_fs_context kfc; struct cgroup_root *root; struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ bool cpuset_clone_children; bool none; /* User explicitly requested empty subsystem */ bool all_ss; /* Seen 'all' option */ u16 subsys_mask; /* Selected subsystems */ char *name; /* Hierarchy name */ char *release_agent; /* Path for release notifications */ }; static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; return container_of(kfc, struct cgroup_fs_context, kfc); } struct cgroup_pidlist; struct cgroup_file_ctx { struct cgroup_namespace *ns; struct { void *trigger; } psi; struct { bool started; struct css_task_iter iter; } procs; struct { struct cgroup_pidlist *pidlist; } procs1; struct cgroup_of_peak peak; }; /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other * direction, a css_set is naturally associated with multiple cgroups. * This M:N relationship is represented by the following link structure * which exists for each association and allows traversing the associations * from both sides. */ struct cgrp_cset_link { /* the cgroup and css_set this link associates */ struct cgroup *cgrp; struct css_set *cset; /* list of cgrp_cset_links anchored at cgrp->cset_links */ struct list_head cset_link; /* list of cgrp_cset_links anchored at css_set->cgrp_links */ struct list_head cgrp_link; }; /* used to track tasks and csets during migration */ struct cgroup_taskset { /* the src and dst cset list running through cset->mg_node */ struct list_head src_csets; struct list_head dst_csets; /* the number of tasks in the set */ int nr_tasks; /* the subsys currently being processed */ int ssid; /* * Fields for cgroup_taskset_*() iteration. * * Before migration is committed, the target migration tasks are on * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of * the csets on ->dst_csets. ->csets point to either ->src_csets * or ->dst_csets depending on whether migration is committed. * * ->cur_csets and ->cur_task point to the current task position * during iteration. */ struct list_head *csets; struct css_set *cur_cset; struct task_struct *cur_task; }; /* migration context also tracks preloading */ struct cgroup_mgctx { /* * Preloaded source and destination csets. Used to guarantee * atomic success or failure on actual migration. */ struct list_head preloaded_src_csets; struct list_head preloaded_dst_csets; /* tasks and csets to migrate */ struct cgroup_taskset tset; /* subsystems affected by migration */ u16 ss_mask; }; #define CGROUP_TASKSET_INIT(tset) \ { \ .src_csets = LIST_HEAD_INIT(tset.src_csets), \ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ .csets = &tset.src_csets, \ } #define CGROUP_MGCTX_INIT(name) \ { \ LIST_HEAD_INIT(name.preloaded_src_csets), \ LIST_HEAD_INIT(name.preloaded_dst_csets), \ CGROUP_TASKSET_INIT(name.tset), \ } #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; /* iterate across the hierarchies */ #define for_each_root(root) \ list_for_each_entry_rcu((root), &cgroup_roots, root_list, \ lockdep_is_held(&cgroup_mutex)) /** * for_each_subsys - iterate all enabled cgroup subsystems * @ss: the iteration cursor * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end */ #define for_each_subsys(ss, ssid) \ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) static inline bool cgroup_is_dead(const struct cgroup *cgrp) { return !(cgrp->self.flags & CSS_ONLINE); } static inline bool notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } void put_css_set_locked(struct css_set *cset); static inline void put_css_set(struct css_set *cset) { unsigned long flags; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ if (refcount_dec_not_one(&cset->refcount)) return; spin_lock_irqsave(&css_set_lock, flags); put_css_set_locked(cset); spin_unlock_irqrestore(&css_set_lock, flags); } /* * refcounted get/put for css_set objects */ static inline void get_css_set(struct css_set *cset) { refcount_inc(&cset->refcount); } bool cgroup_ssid_enabled(int ssid); bool cgroup_on_dfl(const struct cgroup *cgrp); struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root); struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline); void cgroup_kn_unlock(struct kernfs_node *kn); int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_favor_dynmods(struct cgroup_root *root, bool favor); void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, struct cgroup_mgctx *mgctx); int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx); int cgroup_migrate(struct task_struct *leader, bool threadgroup, struct cgroup_mgctx *mgctx); int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup); void cgroup_attach_lock(bool lock_threadgroup); void cgroup_attach_unlock(bool lock_threadgroup); struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, bool *locked) __acquires(&cgroup_threadgroup_rwsem); void cgroup_procs_write_finish(struct task_struct *task, bool locked) __releases(&cgroup_threadgroup_rwsem); void cgroup_lock_and_drain_offline(struct cgroup *cgrp); int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode); int cgroup_rmdir(struct kernfs_node *kn); int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root); int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); /* * rstat.c */ int cgroup_rstat_init(struct cgroup *cgrp); void cgroup_rstat_exit(struct cgroup *cgrp); void cgroup_rstat_boot(void); void cgroup_base_stat_cputime_show(struct seq_file *seq); /* * namespace.c */ extern const struct proc_ns_operations cgroupns_operations; /* * cgroup-v1.c */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; extern const struct fs_parameter_spec cgroup1_fs_parameters[]; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); int cgroup1_get_tree(struct fs_context *fc); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */
64 63 184 185 106 106 134 2 2 3 1 6 6 3 3 3 2 2 273 322 274 4 274 147 203 463 118 274 204 463 463 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/kernel.h> #include <linux/slab.h> #include <net/act_api.h> #include <net/flow_offload.h> #include <linux/rtnetlink.h> #include <linux/mutex.h> #include <linux/rhashtable.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { struct flow_rule *rule; int i; rule = kzalloc(struct_size(rule, action.entries, num_actions), GFP_KERNEL); if (!rule) return NULL; rule->action.num_entries = num_actions; /* Pre-fill each action hw_stats with DONT_CARE. * Caller can override this if it wants stats for a given action. */ for (i = 0; i < num_actions; i++) rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; return rule; } EXPORT_SYMBOL(flow_rule_alloc); struct flow_offload_action *offload_action_alloc(unsigned int num_actions) { struct flow_offload_action *fl_action; int i; fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions), GFP_KERNEL); if (!fl_action) return NULL; fl_action->action.num_entries = num_actions; /* Pre-fill each action hw_stats with DONT_CARE. * Caller can override this if it wants stats for a given action. */ for (i = 0; i < num_actions; i++) fl_action->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; return fl_action; } #define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \ const struct flow_match *__m = &(__rule)->match; \ struct flow_dissector *__d = (__m)->dissector; \ \ (__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key); \ (__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask); \ void flow_rule_match_meta(const struct flow_rule *rule, struct flow_match_meta *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_META, out); } EXPORT_SYMBOL(flow_rule_match_meta); void flow_rule_match_basic(const struct flow_rule *rule, struct flow_match_basic *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out); } EXPORT_SYMBOL(flow_rule_match_basic); void flow_rule_match_control(const struct flow_rule *rule, struct flow_match_control *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out); } EXPORT_SYMBOL(flow_rule_match_control); void flow_rule_match_eth_addrs(const struct flow_rule *rule, struct flow_match_eth_addrs *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out); } EXPORT_SYMBOL(flow_rule_match_eth_addrs); void flow_rule_match_vlan(const struct flow_rule *rule, struct flow_match_vlan *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out); } EXPORT_SYMBOL(flow_rule_match_vlan); void flow_rule_match_cvlan(const struct flow_rule *rule, struct flow_match_vlan *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CVLAN, out); } EXPORT_SYMBOL(flow_rule_match_cvlan); void flow_rule_match_arp(const struct flow_rule *rule, struct flow_match_arp *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ARP, out); } EXPORT_SYMBOL(flow_rule_match_arp); void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out); } EXPORT_SYMBOL(flow_rule_match_ipv4_addrs); void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, struct flow_match_ipv6_addrs *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out); } EXPORT_SYMBOL(flow_rule_match_ipv6_addrs); void flow_rule_match_ip(const struct flow_rule *rule, struct flow_match_ip *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out); } EXPORT_SYMBOL(flow_rule_match_ip); void flow_rule_match_ports(const struct flow_rule *rule, struct flow_match_ports *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out); } EXPORT_SYMBOL(flow_rule_match_ports); void flow_rule_match_ports_range(const struct flow_rule *rule, struct flow_match_ports_range *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE, out); } EXPORT_SYMBOL(flow_rule_match_ports_range); void flow_rule_match_tcp(const struct flow_rule *rule, struct flow_match_tcp *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out); } EXPORT_SYMBOL(flow_rule_match_tcp); void flow_rule_match_ipsec(const struct flow_rule *rule, struct flow_match_ipsec *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPSEC, out); } EXPORT_SYMBOL(flow_rule_match_ipsec); void flow_rule_match_icmp(const struct flow_rule *rule, struct flow_match_icmp *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out); } EXPORT_SYMBOL(flow_rule_match_icmp); void flow_rule_match_mpls(const struct flow_rule *rule, struct flow_match_mpls *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out); } EXPORT_SYMBOL(flow_rule_match_mpls); void flow_rule_match_enc_control(const struct flow_rule *rule, struct flow_match_control *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out); } EXPORT_SYMBOL(flow_rule_match_enc_control); void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out); } EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs); void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule, struct flow_match_ipv6_addrs *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out); } EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs); void flow_rule_match_enc_ip(const struct flow_rule *rule, struct flow_match_ip *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out); } EXPORT_SYMBOL(flow_rule_match_enc_ip); void flow_rule_match_enc_ports(const struct flow_rule *rule, struct flow_match_ports *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out); } EXPORT_SYMBOL(flow_rule_match_enc_ports); void flow_rule_match_enc_keyid(const struct flow_rule *rule, struct flow_match_enc_keyid *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out); } EXPORT_SYMBOL(flow_rule_match_enc_keyid); void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out); } EXPORT_SYMBOL(flow_rule_match_enc_opts); struct flow_action_cookie *flow_action_cookie_create(void *data, unsigned int len, gfp_t gfp) { struct flow_action_cookie *cookie; cookie = kmalloc(sizeof(*cookie) + len, gfp); if (!cookie) return NULL; cookie->cookie_len = len; memcpy(cookie->cookie, data, len); return cookie; } EXPORT_SYMBOL(flow_action_cookie_create); void flow_action_cookie_destroy(struct flow_action_cookie *cookie) { kfree(cookie); } EXPORT_SYMBOL(flow_action_cookie_destroy); void flow_rule_match_ct(const struct flow_rule *rule, struct flow_match_ct *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CT, out); } EXPORT_SYMBOL(flow_rule_match_ct); void flow_rule_match_pppoe(const struct flow_rule *rule, struct flow_match_pppoe *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out); } EXPORT_SYMBOL(flow_rule_match_pppoe); void flow_rule_match_l2tpv3(const struct flow_rule *rule, struct flow_match_l2tpv3 *out) { FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_L2TPV3, out); } EXPORT_SYMBOL(flow_rule_match_l2tpv3); struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) { struct flow_block_cb *block_cb; block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); if (!block_cb) return ERR_PTR(-ENOMEM); block_cb->cb = cb; block_cb->cb_ident = cb_ident; block_cb->cb_priv = cb_priv; block_cb->release = release; return block_cb; } EXPORT_SYMBOL(flow_block_cb_alloc); void flow_block_cb_free(struct flow_block_cb *block_cb) { if (block_cb->release) block_cb->release(block_cb->cb_priv); kfree(block_cb); } EXPORT_SYMBOL(flow_block_cb_free); struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, flow_setup_cb_t *cb, void *cb_ident) { struct flow_block_cb *block_cb; list_for_each_entry(block_cb, &block->cb_list, list) { if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) return block_cb; } return NULL; } EXPORT_SYMBOL(flow_block_cb_lookup); void *flow_block_cb_priv(struct flow_block_cb *block_cb) { return block_cb->cb_priv; } EXPORT_SYMBOL(flow_block_cb_priv); void flow_block_cb_incref(struct flow_block_cb *block_cb) { block_cb->refcnt++; } EXPORT_SYMBOL(flow_block_cb_incref); unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb) { return --block_cb->refcnt; } EXPORT_SYMBOL(flow_block_cb_decref); bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list) { struct flow_block_cb *block_cb; list_for_each_entry(block_cb, driver_block_list, driver_list) { if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) return true; } return false; } EXPORT_SYMBOL(flow_block_cb_is_busy); int flow_block_cb_setup_simple(struct flow_block_offload *f, struct list_head *driver_block_list, flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, bool ingress_only) { struct flow_block_cb *block_cb; if (ingress_only && f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; f->driver_block_list = driver_block_list; switch (f->command) { case FLOW_BLOCK_BIND: if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list)) return -EBUSY; block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); flow_block_cb_add(block_cb, f); list_add_tail(&block_cb->driver_list, driver_block_list); return 0; case FLOW_BLOCK_UNBIND: block_cb = flow_block_cb_lookup(f->block, cb, cb_ident); if (!block_cb) return -ENOENT; flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; } } EXPORT_SYMBOL(flow_block_cb_setup_simple); static DEFINE_MUTEX(flow_indr_block_lock); static LIST_HEAD(flow_block_indr_list); static LIST_HEAD(flow_block_indr_dev_list); static LIST_HEAD(flow_indir_dev_list); struct flow_indr_dev { struct list_head list; flow_indr_block_bind_cb_t *cb; void *cb_priv; refcount_t refcnt; }; static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_indr_dev *indr_dev; indr_dev = kmalloc(sizeof(*indr_dev), GFP_KERNEL); if (!indr_dev) return NULL; indr_dev->cb = cb; indr_dev->cb_priv = cb_priv; refcount_set(&indr_dev->refcnt, 1); return indr_dev; } struct flow_indir_dev_info { void *data; struct net_device *dev; struct Qdisc *sch; enum tc_setup_type type; void (*cleanup)(struct flow_block_cb *block_cb); struct list_head list; enum flow_block_command command; enum flow_block_binder_type binder_type; struct list_head *cb_list; }; static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_block_offload bo; struct flow_indir_dev_info *cur; list_for_each_entry(cur, &flow_indir_dev_list, list) { memset(&bo, 0, sizeof(bo)); bo.command = cur->command; bo.binder_type = cur->binder_type; INIT_LIST_HEAD(&bo.cb_list); cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup); list_splice(&bo.cb_list, cur->cb_list); } } int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_indr_dev *indr_dev; mutex_lock(&flow_indr_block_lock); list_for_each_entry(indr_dev, &flow_block_indr_dev_list, list) { if (indr_dev->cb == cb && indr_dev->cb_priv == cb_priv) { refcount_inc(&indr_dev->refcnt); mutex_unlock(&flow_indr_block_lock); return 0; } } indr_dev = flow_indr_dev_alloc(cb, cb_priv); if (!indr_dev) { mutex_unlock(&flow_indr_block_lock); return -ENOMEM; } list_add(&indr_dev->list, &flow_block_indr_dev_list); existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); tcf_action_reoffload_cb(cb, cb_priv, true); return 0; } EXPORT_SYMBOL(flow_indr_dev_register); static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), void *cb_priv, struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { if (this->release == release && this->indr.cb_priv == cb_priv) list_move(&this->indr.list, cleanup_list); } } static void flow_block_indr_notify(struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, cleanup_list, indr.list) { list_del(&this->indr.list); this->indr.cleanup(this); } } void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, void (*release)(void *cb_priv)) { struct flow_indr_dev *this, *next, *indr_dev = NULL; LIST_HEAD(cleanup_list); mutex_lock(&flow_indr_block_lock); list_for_each_entry_safe(this, next, &flow_block_indr_dev_list, list) { if (this->cb == cb && this->cb_priv == cb_priv && refcount_dec_and_test(&this->refcnt)) { indr_dev = this; list_del(&indr_dev->list); break; } } if (!indr_dev) { mutex_unlock(&flow_indr_block_lock); return; } __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); tcf_action_reoffload_cb(cb, cb_priv, false); flow_block_indr_notify(&cleanup_list); kfree(indr_dev); } EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, struct net_device *dev, struct Qdisc *sch, void *data, void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { flow_block->indr.binder_type = bo->binder_type; flow_block->indr.data = data; flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; flow_block->indr.sch = sch; flow_block->indr.cleanup = cleanup; } struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv), struct flow_block_offload *bo, struct net_device *dev, struct Qdisc *sch, void *data, void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_block_cb *block_cb; block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release); if (IS_ERR(block_cb)) goto out; flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: return block_cb; } EXPORT_SYMBOL(flow_indr_block_cb_alloc); static struct flow_indir_dev_info *find_indir_dev(void *data) { struct flow_indir_dev_info *cur; list_for_each_entry(cur, &flow_indir_dev_list, list) { if (cur->data == data) return cur; } return NULL; } static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb), struct flow_block_offload *bo) { struct flow_indir_dev_info *info; info = find_indir_dev(data); if (info) return -EEXIST; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->data = data; info->dev = dev; info->sch = sch; info->type = type; info->cleanup = cleanup; info->command = bo->command; info->binder_type = bo->binder_type; info->cb_list = bo->cb_list_head; list_add(&info->list, &flow_indir_dev_list); return 0; } static int indir_dev_remove(void *data) { struct flow_indir_dev_info *info; info = find_indir_dev(data); if (!info) return -ENOENT; list_del(&info->list); kfree(info); return 0; } int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_indr_dev *this; u32 count = 0; int err; mutex_lock(&flow_indr_block_lock); if (bo) { if (bo->command == FLOW_BLOCK_BIND) indir_dev_add(data, dev, sch, type, cleanup, bo); else if (bo->command == FLOW_BLOCK_UNBIND) indir_dev_remove(data); } list_for_each_entry(this, &flow_block_indr_dev_list, list) { err = this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); if (!err) count++; } mutex_unlock(&flow_indr_block_lock); return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count; } EXPORT_SYMBOL(flow_indr_dev_setup_offload); bool flow_indr_dev_exists(void) { return !list_empty(&flow_block_indr_dev_list); } EXPORT_SYMBOL(flow_indr_dev_exists);
135 4 8 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2006, Johannes Berg <johannes@sipsolutions.net> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/leds.h> #include "ieee80211_i.h" #define MAC80211_BLINK_DELAY 50 /* ms */ static inline void ieee80211_led_rx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->rx_led_active)) return; led_trigger_blink_oneshot(&local->rx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } static inline void ieee80211_led_tx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->tx_led_active)) return; led_trigger_blink_oneshot(&local->tx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } #ifdef CONFIG_MAC80211_LEDS void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); void ieee80211_alloc_led_names(struct ieee80211_local *local); void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { } static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) { } static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) { } static inline void ieee80211_led_exit(struct ieee80211_local *local) { } static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off) { } #endif static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif }
1 4 1 3 33 36 31 4 1 35 43 5 36 2 5 25 11 24 5 2 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C)2002 USAGI/WIDE Project * * Authors * * Mitsuru KANDA @USAGI : IPv6 Support * Kazunori MIYAZAWA @USAGI : * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * * This file is derived from net/ipv4/esp.c */ #define pr_fmt(fmt) "IPv6: " fmt #include <crypto/aead.h> #include <crypto/authenc.h> #include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> #include <linux/scatterlist.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/udp.h> #include <linux/icmpv6.h> #include <net/tcp.h> #include <net/espintcp.h> #include <net/inet6_hashtables.h> #include <linux/skbuff_ref.h> #include <linux/highmem.h> struct esp_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; }; struct esp_output_extra { __be32 seqhi; u32 esphoff; }; #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) /* * Allocate an AEAD request structure with extra space for SG and IV. * * For alignment considerations the upper 32 bits of the sequence number are * placed at the front, if present. Followed by the IV, the request and finally * the SG list. * * TODO: Use spare space in skb for this where possible. */ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) { unsigned int len; len = seqihlen; len += crypto_aead_ivsize(aead); if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); len = ALIGN(len, crypto_tfm_ctx_alignment()); } len += sizeof(struct aead_request) + crypto_aead_reqsize(aead); len = ALIGN(len, __alignof__(struct scatterlist)); len += sizeof(struct scatterlist) * nfrags; return kmalloc(len, GFP_ATOMIC); } static inline void *esp_tmp_extra(void *tmp) { return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? PTR_ALIGN((u8 *)tmp + seqhilen, crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) { struct aead_request *req; req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), crypto_tfm_ctx_alignment()); aead_request_set_tfm(req, aead); return req; } static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, struct aead_request *req) { return (void *)ALIGN((unsigned long)(req + 1) + crypto_aead_reqsize(aead), __alignof__(struct scatterlist)); } static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) { struct crypto_aead *aead = x->data; int extralen = 0; u8 *iv; struct aead_request *req; struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) extralen += sizeof(struct esp_output_extra); iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ if (req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); } #ifdef CONFIG_INET6_ESPINTCP struct esp_tcp_sk { struct sock *sk; struct rcu_head rcu; }; static void esp_free_tcp_sk(struct rcu_head *head) { struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); sock_put(esk->sk); kfree(esk); } static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); struct esp_tcp_sk *esk; __be16 sport, dport; struct sock *nsk; struct sock *sk; sk = rcu_dereference(x->encap_sk); if (sk && sk->sk_state == TCP_ESTABLISHED) return sk; spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; nsk = rcu_dereference_protected(x->encap_sk, lockdep_is_held(&x->lock)); if (sk && sk == nsk) { esk = kmalloc(sizeof(*esk), GFP_ATOMIC); if (!esk) { spin_unlock_bh(&x->lock); return ERR_PTR(-ENOMEM); } RCU_INIT_POINTER(x->encap_sk, NULL); esk->sk = sk; call_rcu(&esk->rcu, esp_free_tcp_sk); } spin_unlock_bh(&x->lock); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, dport, &x->props.saddr.in6, ntohs(sport), 0, 0); if (!sk) return ERR_PTR(-ENOENT); if (!tcp_is_ulp_esp(sk)) { sock_put(sk); return ERR_PTR(-EINVAL); } spin_lock_bh(&x->lock); nsk = rcu_dereference_protected(x->encap_sk, lockdep_is_held(&x->lock)); if (encap->encap_sport != sport || encap->encap_dport != dport) { sock_put(sk); sk = nsk ?: ERR_PTR(-EREMCHG); } else if (sk == nsk) { sock_put(sk); } else { rcu_assign_pointer(x->encap_sk, sk); } spin_unlock_bh(&x->lock); return sk; } static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) { struct sock *sk; int err; rcu_read_lock(); sk = esp6_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); if (err) goto out; bh_lock_sock(sk); if (sock_owned_by_user(sk)) err = espintcp_queue_out(sk, skb); else err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); out: rcu_read_unlock(); return err; } static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; return esp_output_tcp_finish(x, skb); } static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) { int err; local_bh_disable(); err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); local_bh_enable(); /* EINPROGRESS just happens to do the right thing. It * actually means that the skb has been consumed and * isn't coming back. */ return err ?: -EINPROGRESS; } #else static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) { WARN_ON(1); return -EOPNOTSUPP; } #endif static void esp_output_encap_csum(struct sk_buff *skb) { /* UDP encap with IPv6 requires a valid checksum */ if (*skb_mac_header(skb) == IPPROTO_UDP) { struct udphdr *uh = udp_hdr(skb); struct ipv6hdr *ip6h = ipv6_hdr(skb); int len = ntohs(uh->len); unsigned int offset = skb_transport_offset(skb); __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0); uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } static void esp_output_done(void *data, int err) { struct sk_buff *skb = data; struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; struct xfrm_state *x; if (xo && (xo->flags & XFRM_DEV_RESUME)) { struct sec_path *sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; } else { x = skb_dst(skb)->xfrm; } tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp, skb); kfree(tmp); esp_output_encap_csum(skb); if (xo && (xo->flags & XFRM_DEV_RESUME)) { if (err) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); kfree_skb(skb); return; } skb_push(skb, skb->data - skb_mac_header(skb)); secpath_reset(skb); xfrm_dev_resume(skb); } else { if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } /* Move ESP header back into place. */ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; } static void esp_output_restore_header(struct sk_buff *skb) { void *tmp = ESP_SKB_CB(skb)->tmp; struct esp_output_extra *extra = esp_tmp_extra(tmp); esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - sizeof(__be32)); } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, struct xfrm_state *x, struct ip_esp_hdr *esph, struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to * accommodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { __u32 seqhi; struct xfrm_offload *xo = xfrm_offload(skb); if (xo) seqhi = xo->seq.hi; else seqhi = XFRM_SKB_CB(skb)->seq.output.hi; extra->esphoff = (unsigned char *)esph - skb_transport_header(skb); esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); extra->seqhi = esph->spi; esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; return esph; } static void esp_output_done_esn(void *data, int err) { struct sk_buff *skb = data; esp_output_restore_header(skb); esp_output_done(data, err); } static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, int encap_type, struct esp_info *esp, __be16 sport, __be16 dport) { struct udphdr *uh; unsigned int len; len = skb->len + esp->tailen - skb_transport_offset(skb); if (len > U16_MAX) return ERR_PTR(-EMSGSIZE); uh = (struct udphdr *)esp->esph; uh->source = sport; uh->dest = dport; uh->len = htons(len); uh->check = 0; *skb_mac_header(skb) = IPPROTO_UDP; return (struct ip_esp_hdr *)(uh + 1); } #ifdef CONFIG_INET6_ESPINTCP static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { __be16 *lenp = (void *)esp->esph; struct ip_esp_hdr *esph; unsigned int len; struct sock *sk; len = skb->len + esp->tailen - skb_transport_offset(skb); if (len > IP_MAX_MTU) return ERR_PTR(-EMSGSIZE); rcu_read_lock(); sk = esp6_find_tcp_sk(x); rcu_read_unlock(); if (IS_ERR(sk)) return ERR_CAST(sk); *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); return esph; } #else static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { return ERR_PTR(-EOPNOTSUPP); } #endif static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { struct xfrm_encap_tmpl *encap = x->encap; struct ip_esp_hdr *esph; __be16 sport, dport; int encap_type; spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; encap_type = encap->encap_type; spin_unlock_bh(&x->lock); switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); break; case TCP_ENCAP_ESPINTCP: esph = esp6_output_tcp_encap(x, skb, esp); break; } if (IS_ERR(esph)) return PTR_ERR(esph); esp->esph = esph; return 0; } int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; int nfrags; int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; if (x->encap) { int err = esp6_output_encap(x, skb, esp); if (err < 0) return err; } if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; trailer = skb; tail = skb_tail_pointer(trailer); goto skip_cow; } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) && !skb_has_frag_list(skb)) { int allocsize; struct sock *sk = skb->sk; struct page_frag *pfrag = &x->xfrag; esp->inplace = false; allocsize = ALIGN(tailen, L1_CACHE_BYTES); spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); goto cow; } page = pfrag->page; get_page(page); tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, tailen); skb_shinfo(skb)->nr_frags = ++nfrags; pfrag->offset = pfrag->offset + allocsize; spin_unlock_bh(&x->lock); nfrags++; skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } } cow: esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); pskb_put(skb, trailer, tailen); out: return nfrags; } EXPORT_SYMBOL_GPL(esp6_output_head); int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *iv; int alen; void *tmp; int ivlen; int assoclen; int extralen; struct page *page; struct ip_esp_hdr *esph; struct aead_request *req; struct crypto_aead *aead; struct scatterlist *sg, *dsg; struct esp_output_extra *extra; int err = -ENOMEM; assoclen = sizeof(struct ip_esp_hdr); extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { extralen += sizeof(*extra); assoclen += sizeof(__be32); } aead = x->data; alen = crypto_aead_authsize(aead); ivlen = crypto_aead_ivsize(aead); tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); if (!tmp) goto error; extra = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); if (esp->inplace) dsg = sg; else dsg = &sg[esp->nfrags]; esph = esp_output_set_esn(skb, x, esp->esph, extra); esp->esph = esph; sg_init_table(sg, esp->nfrags); err = skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) goto error_free; if (!esp->inplace) { int allocsize; struct page_frag *pfrag = &x->xfrag; allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); goto error_free; } skb_shinfo(skb)->nr_frags = 1; page = pfrag->page; get_page(page); /* replace page frags in skb with new page */ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); pfrag->offset = pfrag->offset + allocsize; spin_unlock_bh(&x->lock); sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) goto error_free; } if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_output_done_esn, skb); else aead_request_set_callback(req, 0, esp_output_done, skb); aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv); aead_request_set_ad(req, assoclen); memset(iv, 0, ivlen); memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8), min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_encrypt(req); switch (err) { case -EINPROGRESS: goto error; case -ENOSPC: err = NET_XMIT_DROP; break; case 0: if ((x->props.flags & XFRM_STATE_ESN)) esp_output_restore_header(skb); esp_output_encap_csum(skb); } if (sg != dsg) esp_ssg_unref(x, tmp, skb); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); error_free: kfree(tmp); error: return err; } EXPORT_SYMBOL_GPL(esp6_output_tail); static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int alen; int blksize; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; esp.inplace = true; esp.proto = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ESP; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); esp.tfclen = 0; if (x->tfcpad) { struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } blksize = ALIGN(crypto_aead_blocksize(aead), 4); esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; esp.esph = ip_esp_hdr(skb); esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; esph = esp.esph; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); skb_push(skb, -skb_network_offset(skb)); return esp6_output_tail(x, skb, &esp); } static inline int esp_remove_trailer(struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); struct crypto_aead *aead = x->data; int alen, hlen, elen; int padlen, trimlen; __wsum csumdiff; u8 nexthdr[2]; int ret; alen = crypto_aead_authsize(aead); hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); elen = skb->len - hlen; ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2); BUG_ON(ret); ret = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen + 2, elen - alen); goto out; } trimlen = alen + padlen + 2; if (skb->ip_summed == CHECKSUM_COMPLETE) { csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } ret = pskb_trim(skb, skb->len - trimlen); if (unlikely(ret)) return ret; ret = nexthdr[1]; out: return ret; } int esp6_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int hdr_len = skb_network_header_len(skb); if (!xo || !(xo->flags & CRYPTO_DONE)) kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; err = esp_remove_trailer(skb); if (unlikely(err < 0)) goto out; if (x->encap) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); int offset = skb_network_offset(skb) + sizeof(*ip6h); struct xfrm_encap_tmpl *encap = x->encap; u8 nexthdr = ip6h->nexthdr; __be16 frag_off, source; struct udphdr *uh; struct tcphdr *th; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); if (offset == -1) { err = -EINVAL; goto out; } uh = (void *)(skb->data + offset); th = (void *)(skb->data + offset); hdr_len += offset; switch (x->encap->encap_type) { case TCP_ENCAP_ESPINTCP: source = th->source; break; case UDP_ENCAP_ESPINUDP: source = uh->source; break; default: WARN_ON_ONCE(1); err = -EINVAL; goto out; } /* * 1) if the NAT-T peer's IP or port changed then * advertise the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) || source != encap->encap_sport) { xfrm_address_t ipaddr; memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6)); km_new_mapping(x, &ipaddr, source); /* XXX: perhaps add an extra * policy check here, to see * if we should allow or * reject a packet from a * different source * address/port. */ } /* * 2) ignore UDP/TCP checksums in case * of NAT-T in Transport Mode, or * perform other post-processing fixes * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); if (x->props.mode == XFRM_MODE_TUNNEL || x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); /* RFC4303: Drop dummy packets without any error */ if (err == IPPROTO_NONE) err = -EINVAL; out: return err; } EXPORT_SYMBOL_GPL(esp6_input_done2); static void esp_input_done(void *data, int err) { struct sk_buff *skb = data; xfrm_input_resume(skb, esp6_input_done2(skb, err)); } static void esp_input_restore_header(struct sk_buff *skb) { esp_restore_header(skb, 0); __skb_pull(skb, 4); } static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) { struct xfrm_state *x = xfrm_input_state(skb); /* For ESN we move the header forward by 4 bytes to * accommodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { struct ip_esp_hdr *esph = skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; } } static void esp_input_done_esn(void *data, int err) { struct sk_buff *skb = data; esp_input_restore_header(skb); esp_input_done(data, err); } static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int ivlen = crypto_aead_ivsize(aead); int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen; int nfrags; int assoclen; int seqhilen; int ret = 0; void *tmp; __be32 *seqhi; u8 *iv; struct scatterlist *sg; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen)) { ret = -EINVAL; goto out; } if (elen <= 0) { ret = -EINVAL; goto out; } assoclen = sizeof(struct ip_esp_hdr); seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { seqhilen += sizeof(__be32); assoclen += seqhilen; } if (!skb_cloned(skb)) { if (!skb_is_nonlinear(skb)) { nfrags = 1; goto skip_cow; } else if (!skb_has_frag_list(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; goto skip_cow; } } nfrags = skb_cow_data(skb, 0, &trailer); if (nfrags < 0) { ret = -EINVAL; goto out; } skip_cow: ret = -ENOMEM; tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { kfree(tmp); goto out; } skb->ip_summed = CHECKSUM_NONE; if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_input_done_esn, skb); else aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) goto out; if ((x->props.flags & XFRM_STATE_ESN)) esp_input_restore_header(skb); ret = esp6_input_done2(skb, ret); out: return ret; } static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static void esp6_destroy(struct xfrm_state *x) { struct crypto_aead *aead = x->data; if (!aead) return; crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x, struct netlink_ext_ack *extack) { char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) { NL_SET_ERR_MSG(extack, "Algorithm name is too long"); return -ENAMETOOLONG; } aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); if (err) goto error; err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8); if (err) goto error; return 0; error: NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); return err; } static int esp_init_authenc(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; char *key; char *p; char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; err = -ENAMETOOLONG; if ((x->props.flags & XFRM_STATE_ESN)) { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "%s%sauthencesn(%s,%s)%s", x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; } } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "%s%sauthenc(%s,%s)%s", x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; } } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); err = -ENOMEM; key = kmalloc(keylen, GFP_KERNEL); if (!key) goto error; p = key; rta = (void *)p; rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; rta->rta_len = RTA_LENGTH(sizeof(*param)); param = RTA_DATA(rta); p += RTA_SPACE(sizeof(*param)); if (x->aalg) { struct xfrm_algo_desc *aalg_desc; memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8); p += (x->aalg->alg_key_len + 7) / 8; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; } err = crypto_aead_setauthsize( aead, x->aalg->alg_trunc_len / 8); if (err) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; } } param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); err = crypto_aead_setkey(aead, key, keylen); free_key: kfree(key); error: return err; } static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; u32 align; int err; x->data = NULL; if (x->aead) { err = esp_init_aead(x, extack); } else if (x->ealg) { err = esp_init_authenc(x, extack); } else { NL_SET_ERR_MSG(extack, "ESP: AEAD or CRYPT must be provided"); err = -EINVAL; } if (err) goto error; aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); switch (x->props.mode) { case XFRM_MODE_BEET: if (x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); break; default: case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; } if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; switch (encap->encap_type) { default: NL_SET_ERR_MSG(extack, "Unsupported encapsulation type for ESP"); err = -EINVAL; goto error; case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; #ifdef CONFIG_INET6_ESPINTCP case TCP_ENCAP_ESPINTCP: /* only the length field, TCP encap is done by * the socket */ x->props.header_len += 2; break; #endif } } align = ALIGN(crypto_aead_blocksize(aead), 4); x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; } static int esp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type esp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp6_init_state, .destructor = esp6_destroy, .input = esp6_input, .output = esp6_output, }; static struct xfrm6_protocol esp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, .priority = 0, }; static int __init esp6_init(void) { if (xfrm_register_type(&esp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit esp6_fini(void) { if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); } module_init(esp6_init); module_exit(esp6_fini); MODULE_DESCRIPTION("IPv6 ESP transformation helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
10 8 3 8 3 8 5 4 8 10 10 6 1 5 5 8 8 7 12 8 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 /* * Poly1305 authenticator algorithm, RFC7539 * * Copyright (C) 2015 Martin Willi * * Based on public domain code by Andrew Moon and Daniel J. Bernstein. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/unaligned.h> static int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); poly1305_core_init(&dctx->h); dctx->buflen = 0; dctx->rset = 0; dctx->sset = false; return 0; } static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { if (!dctx->sset) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { poly1305_core_setkey(&dctx->core_r, src); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->rset = 2; } if (srclen >= POLY1305_BLOCK_SIZE) { dctx->s[0] = get_unaligned_le32(src + 0); dctx->s[1] = get_unaligned_le32(src + 4); dctx->s[2] = get_unaligned_le32(src + 8); dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->sset = true; } } return srclen; } static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { unsigned int datalen; if (unlikely(!dctx->sset)) { datalen = crypto_poly1305_setdesckey(dctx, src, srclen); src += srclen - datalen; srclen = datalen; } poly1305_core_blocks(&dctx->h, &dctx->core_r, src, srclen / POLY1305_BLOCK_SIZE, 1); } static int crypto_poly1305_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int bytes; if (unlikely(dctx->buflen)) { bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); memcpy(dctx->buf + dctx->buflen, src, bytes); src += bytes; srclen -= bytes; dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE); dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { poly1305_blocks(dctx, src, srclen); src += srclen - (srclen % POLY1305_BLOCK_SIZE); srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { dctx->buflen = srclen; memcpy(dctx->buf, src, srclen); } return 0; } static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); if (unlikely(!dctx->sset)) return -ENOKEY; poly1305_final_generic(dctx, dst); return 0; } static struct shash_alg poly1305_alg = { .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", .cra_driver_name = "poly1305-generic", .cra_priority = 100, .cra_blocksize = POLY1305_BLOCK_SIZE, .cra_module = THIS_MODULE, }, }; static int __init poly1305_mod_init(void) { return crypto_register_shash(&poly1305_alg); } static void __exit poly1305_mod_exit(void) { crypto_unregister_shash(&poly1305_alg); } subsys_initcall(poly1305_mod_init); module_exit(poly1305_mod_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); MODULE_DESCRIPTION("Poly1305 authenticator"); MODULE_ALIAS_CRYPTO("poly1305"); MODULE_ALIAS_CRYPTO("poly1305-generic");
12 12 13 12 12 12 12 12 12 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif #if IS_ENABLED(CONFIG_SND_CTL_LED) #define AUDIO_TRIGGER(_name) .trigger = _name #else #define AUDIO_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute", AUDIO_TRIGGER("audio-mute") }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2");
703 64 704 65 124 16 188 191 113 7 142 34 34 148 148 148 148 137 137 137 35 25 18 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H #include <linux/mm.h> #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/fs.h> #include <linux/hugetlb_inline.h> #include <linux/cgroup.h> #include <linux/page_ref.h> #include <linux/list.h> #include <linux/kref.h> #include <linux/pgtable.h> #include <linux/gfp.h> #include <linux/userfaultfd_k.h> struct ctl_table; struct user_struct; struct mmu_gather; struct node; void free_huge_folio(struct folio *folio); #ifdef CONFIG_HUGETLB_PAGE #include <linux/pagemap.h> #include <linux/shm.h> #include <asm/tlbflush.h> /* * For HugeTLB page, there are more metadata to save in the struct page. But * the head struct page cannot meet our needs, so we have to abuse other tail * struct page to store the metadata. */ #define __NR_USED_SUBPAGE 3 struct hugepage_subpool { spinlock_t lock; long count; long max_hpages; /* Maximum huge pages or -1 if no maximum. */ long used_hpages; /* Used count against maximum, includes */ /* both allocated and reserved pages. */ struct hstate *hstate; long min_hpages; /* Minimum huge pages or -1 if no minimum. */ long rsv_hpages; /* Pages reserved against global pool to */ /* satisfy minimum size. */ }; struct resv_map { struct kref refs; spinlock_t lock; struct list_head regions; long adds_in_progress; struct list_head region_cache; long region_cache_count; struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored * here. If these fields are 0, then either the mapping is shared, or * cgroup accounting is disabled for this resv_map. */ struct page_counter *reservation_counter; unsigned long pages_per_hpage; struct cgroup_subsys_state *css; #endif }; /* * Region tracking -- allows tracking of reservations and instantiated pages * across the pages in a mapping. * * The region data structures are embedded into a resv_map and protected * by a resv_map's lock. The set of regions within the resv_map represent * reservations for huge pages, or huge pages that have already been * instantiated within the map. The from and to elements are huge page * indices into the associated mapping. from indicates the starting index * of the region. to represents the first index past the end of the region. * * For example, a file region structure with from == 0 and to == 4 represents * four huge pages in a mapping. It is important to note that the to element * represents the first element past the end of the region. This is used in * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. * * Interval notation of the form [from, to) will be used to indicate that * the endpoint from is inclusive and to is exclusive. */ struct file_region { struct list_head link; long from; long to; #ifdef CONFIG_CGROUP_HUGETLB /* * On shared mappings, each reserved region appears as a struct * file_region in resv_map. These fields hold the info needed to * uncharge each reservation. */ struct page_counter *reservation_counter; struct cgroup_subsys_state *css; #endif }; struct hugetlb_vma_lock { struct kref refs; struct rw_semaphore rw_sema; struct vm_area_struct *vma; }; extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); extern spinlock_t hugetlb_lock; extern int hugetlb_max_hstate __read_mostly; #define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, long min_hpages); void hugepage_put_subpool(struct hugepage_subpool *spool); void hugetlb_dup_vma_private(struct vm_area_struct *vma); void clear_vma_resv_huge_pages(struct vm_area_struct *vma); int move_hugetlb_page_tables(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo_node(int nid); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); #ifdef CONFIG_USERFAULTFD int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud); bool hugetlbfs_pagecache_present(struct hstate *h, struct vm_area_struct *vma, unsigned long address); struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages[MAX_NUMNODES]; /* arch callbacks */ #ifndef CONFIG_HIGHPTE /* * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures * which may go down to the lowest PTE level in their huge_pte_offset() and * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). */ static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) { return pte_offset_kernel(pmd, address); } static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); } #endif pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); /* * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. * Returns the pte_t* if found, or NULL if the address is not mapped. * * IMPORTANT: we should normally not directly call this function, instead * this is only a common interface to implement arch-specific * walker. Please use hugetlb_walk() instead, because that will attempt to * verify the locking for you. * * Since this function will walk all the pgtable pages (including not only * high-level pgtable page, but also PUD entry that can be unshared * concurrently for VM_SHARED), the caller of this function should be * responsible of its thread safety. One can follow this rule: * * (1) For private mappings: pmd unsharing is not possible, so holding the * mmap_lock for either read or write is sufficient. Most callers * already hold the mmap_lock, so normally, no special action is * required. * * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged * pgtable page can go away from under us! It can be done by a pmd * unshare with a follow up munmap() on the other process), then we * need either: * * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare * won't happen upon the range (it also makes sure the pte_t we * read is the right and stable one), or, * * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make * sure even if unshare happened the racy unmap() will wait until * i_mmap_rwsem is released. * * Option (2.1) is the safest, which guarantees pte stability from pmd * sharing pov, until the vma lock released. Option (2.2) doesn't protect * a concurrent pmd unshare, but it makes sure the pgtable page is safe to * access. */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); extern void __hugetlb_zap_begin(struct vm_area_struct *vma, unsigned long *begin, unsigned long *end); extern void __hugetlb_zap_end(struct vm_area_struct *vma, struct zap_details *details); static inline void hugetlb_zap_begin(struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { if (is_vm_hugetlb_page(vma)) __hugetlb_zap_begin(vma, start, end); } static inline void hugetlb_zap_end(struct vm_area_struct *vma, struct zap_details *details) { if (is_vm_hugetlb_page(vma)) __hugetlb_zap_end(vma, details); } void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); void hugetlb_vma_unlock_write(struct vm_area_struct *vma); int hugetlb_vma_trylock_write(struct vm_area_struct *vma); void hugetlb_vma_assert_locked(struct vm_area_struct *vma); void hugetlb_vma_lock_release(struct kref *kref); long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) { } static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) { } static inline unsigned long hugetlb_total_pages(void) { return 0; } static inline struct address_space *hugetlb_folio_mapping_lock_write( struct folio *folio) { return NULL; } static inline int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return 0; } static inline void adjust_range_if_pmd_sharing_possible( struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { } static inline void hugetlb_zap_begin( struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { } static inline void hugetlb_zap_end( struct vm_area_struct *vma, struct zap_details *details) { } static inline int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { BUG(); return 0; } static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long len) { BUG(); return 0; } static inline void hugetlb_report_meminfo(struct seq_file *m) { } static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) { return 0; } static inline void hugetlb_show_meminfo_node(int nid) { } static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { return -EINVAL; } static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) { } static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) { } static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) { } static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) { } static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) { return 1; } static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) { } static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { return 0; } static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { BUG(); } #ifdef CONFIG_USERFAULTFD static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, struct folio **foliop) { BUG(); return 0; } #endif /* CONFIG_USERFAULTFD */ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { return NULL; } static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) { return false; } static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) { return 0; } static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { return 0; } static inline void folio_putback_hugetlb(struct folio *folio) { } static inline void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason) { } static inline long hugetlb_change_protection( struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { return 0; } static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) { BUG(); } static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { BUG(); return 0; } static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write static inline int pgd_write(pgd_t pgd) { BUG(); return 0; } #endif #define HUGETLB_ANON_FILE "anon_hugepage" enum { /* * The file will be used as an shm file so shmfs accounting rules * apply */ HUGETLB_SHMFS_INODE = 1, /* * The file is being created on the internal vfs mount and shmfs * accounting rules do not apply */ HUGETLB_ANONHUGE_INODE = 2, }; #ifdef CONFIG_HUGETLBFS struct hugetlbfs_sb_info { long max_inodes; /* inodes allowed */ long free_inodes; /* inodes free */ spinlock_t stat_lock; struct hstate *hstate; struct hugepage_subpool *spool; kuid_t uid; kgid_t gid; umode_t mode; }; static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) { return sb->s_fs_info; } struct hugetlbfs_inode_info { struct inode vfs_inode; unsigned int seals; }; static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) { return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, int creat_flags, int page_size_log); static inline bool is_file_hugepages(const struct file *file) { return file->f_op->fop_flags & FOP_HUGE_PAGES; } static inline struct hstate *hstate_inode(struct inode *i) { return HUGETLBFS_SB(i->i_sb)->hstate; } #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, int creat_flags, int page_size_log) { return ERR_PTR(-ENOSYS); } static inline struct hstate *hstate_inode(struct inode *i) { return NULL; } #endif /* !CONFIG_HUGETLBFS */ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); /* * huegtlb page specific state flags. These flags are located in page.private * of the hugetlb head page. Functions created via the below macros should be * used to manipulate these flags. * * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at * allocation time. Cleared when page is fully instantiated. Free * routine checks flag to restore a reservation on error paths. * Synchronization: Examined or modified by code that knows it has * the only reference to page. i.e. After allocation but before use * or when the page is being freed. * HPG_migratable - Set after a newly allocated page is added to the page * cache and/or page tables. Indicates the page is a candidate for * migration. * Synchronization: Initially set after new page allocation with no * locking. When examined and modified during migration processing * (isolate, migrate, putback) the hugetlb_lock is held. * HPG_temporary - Set on a page that is temporarily allocated from the buddy * allocator. Typically used for migration target pages when no pages * are available in the pool. The hugetlb free page path will * immediately free pages with this flag set to the buddy allocator. * Synchronization: Can be set after huge page allocation from buddy when * code knows it has only reference. All other examinations and * modifications require hugetlb_lock. * HPG_freed - Set when page is on the free lists. * Synchronization: hugetlb_lock held for examination and modification. * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page * that is not tracked by raw_hwp_page list. */ enum hugetlb_page_flags { HPG_restore_reserve = 0, HPG_migratable, HPG_temporary, HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, __NR_HPAGEFLAGS, }; /* * Macros to create test, set and clear function definitions for * hugetlb specific page flags. */ #ifdef CONFIG_HUGETLB_PAGE #define TESTHPAGEFLAG(uname, flname) \ static __always_inline \ bool folio_test_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ return test_bit(HPG_##flname, private); \ } #define SETHPAGEFLAG(uname, flname) \ static __always_inline \ void folio_set_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ set_bit(HPG_##flname, private); \ } #define CLEARHPAGEFLAG(uname, flname) \ static __always_inline \ void folio_clear_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ clear_bit(HPG_##flname, private); \ } #else #define TESTHPAGEFLAG(uname, flname) \ static inline bool \ folio_test_hugetlb_##flname(struct folio *folio) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ static inline void \ folio_set_hugetlb_##flname(struct folio *folio) \ { } #define CLEARHPAGEFLAG(uname, flname) \ static inline void \ folio_clear_hugetlb_##flname(struct folio *folio) \ { } #endif #define HPAGEFLAG(uname, flname) \ TESTHPAGEFLAG(uname, flname) \ SETHPAGEFLAG(uname, flname) \ CLEARHPAGEFLAG(uname, flname) \ /* * Create functions associated with hugetlb page flags */ HPAGEFLAG(RestoreReserve, restore_reserve) HPAGEFLAG(Migratable, migratable) HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) #ifdef CONFIG_HUGETLB_PAGE #define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { struct mutex resize_lock; struct lock_class_key resize_key; int next_nid_to_alloc; int next_nid_to_free; unsigned int order; unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; unsigned long free_huge_pages; unsigned long resv_huge_pages; unsigned long surplus_huge_pages; unsigned long nr_overcommit_huge_pages; struct list_head hugepage_activelist; struct list_head hugepage_freelists[MAX_NUMNODES]; unsigned int max_huge_pages_node[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; char name[HSTATE_NAME_LEN]; }; struct huge_bootmem_page { struct list_head list; struct hstate *hstate; }; int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct folio *folio); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); int __init alloc_bootmem_huge_page(struct hstate *h, int nid); bool __init hugetlb_node_alloc_supported(void); void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); struct hstate *size_to_hstate(unsigned long size); #ifndef HUGE_MAX_HSTATE #define HUGE_MAX_HSTATE 1 #endif extern struct hstate hstates[HUGE_MAX_HSTATE]; extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return folio->_hugetlb_subpool; } static inline void hugetlb_set_folio_subpool(struct folio *folio, struct hugepage_subpool *subpool) { folio->_hugetlb_subpool = subpool; } static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); } static inline struct hstate *hstate_sizelog(int page_size_log) { if (!page_size_log) return &default_hstate; if (page_size_log < BITS_PER_LONG) return size_to_hstate(1UL << page_size_log); return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) { return hstate_file(vma->vm_file); } static inline unsigned long huge_page_size(const struct hstate *h) { return (unsigned long)PAGE_SIZE << h->order; } extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; } static inline unsigned int huge_page_order(struct hstate *h) { return h->order; } static inline unsigned huge_page_shift(struct hstate *h) { return h->order + PAGE_SHIFT; } static inline bool hstate_is_gigantic(struct hstate *h) { return huge_page_order(h) > MAX_PAGE_ORDER; } static inline unsigned int pages_per_huge_page(const struct hstate *h) { return 1 << h->order; } static inline unsigned int blocks_per_huge_page(struct hstate *h) { return huge_page_size(h) / 512; } static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, struct address_space *mapping, pgoff_t idx) { return filemap_lock_folio(mapping, idx << huge_page_order(h)); } #include <asm/hugetlb.h> #ifndef is_hugepage_only_range static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { return 0; } #define is_hugepage_only_range is_hugepage_only_range #endif #ifndef arch_clear_hugetlb_flags static inline void arch_clear_hugetlb_flags(struct folio *folio) { } #define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif #ifndef arch_make_huge_pte static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { return pte_mkhuge(entry); } #endif static inline struct hstate *folio_hstate(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); return size_to_hstate(folio_size(folio)); } static inline unsigned hstate_index_to_shift(unsigned index) { return hstates[index].order + PAGE_SHIFT; } static inline int hstate_index(struct hstate *h) { return h - hstates; } int dissolve_free_hugetlb_folio(struct folio *folio); int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_MEMORY_FAILURE extern void folio_clear_hugetlb_hwpoison(struct folio *folio); #else static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) { } #endif #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION #ifndef arch_hugetlb_migration_supported static inline bool arch_hugetlb_migration_supported(struct hstate *h) { if ((huge_page_shift(h) == PMD_SHIFT) || (huge_page_shift(h) == PUD_SHIFT) || (huge_page_shift(h) == PGDIR_SHIFT)) return true; else return false; } #endif #else static inline bool arch_hugetlb_migration_supported(struct hstate *h) { return false; } #endif static inline bool hugepage_migration_supported(struct hstate *h) { return arch_hugetlb_migration_supported(h); } /* * Movability check is different as compared to migration check. * It determines whether or not a huge page should be placed on * movable zone or not. Movability of any huge page should be * required only if huge page size is supported for migration. * There won't be any reason for the huge page to be movable if * it is not migratable to start with. Also the size of the huge * page should be large enough to be placed under a movable zone * and still feasible enough to be migratable. Just the presence * in movable zone does not make the migration feasible. * * So even though large huge page sizes like the gigantic ones * are migratable they should not be movable because its not * feasible to migrate them from movable zone. */ static inline bool hugepage_movable_supported(struct hstate *h) { if (!hugepage_migration_supported(h)) return false; if (hstate_is_gigantic(h)) return false; return true; } /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { gfp_t gfp = __GFP_COMP | __GFP_NOWARN; gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; return gfp; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) { gfp_t modified_mask = htlb_alloc_mask(h); /* Some callers might want to enforce node */ modified_mask |= (gfp_mask & __GFP_THISNODE); modified_mask |= (gfp_mask & __GFP_NOWARN); return modified_mask; } static inline bool htlb_allow_alloc_fallback(int reason) { bool allowed_fallback = false; /* * Note: the memory offline, memory failure and migration syscalls will * be allowed to fallback to other nodes due to lack of a better chioce, * that might break the per-node hugetlb pool. While other cases will * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. */ switch (reason) { case MR_MEMORY_HOTPLUG: case MR_MEMORY_FAILURE: case MR_SYSCALL: case MR_MEMPOLICY_MBIND: allowed_fallback = true; break; default: break; } return allowed_fallback; } static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { const unsigned long size = huge_page_size(h); VM_WARN_ON(size == PAGE_SIZE); /* * hugetlb must use the exact same PT locks as core-mm page table * walkers would. When modifying a PTE table, hugetlb must take the * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD * PT lock etc. * * The expectation is that any hugetlb folio smaller than a PMD is * always mapped into a single PTE table and that any hugetlb folio * smaller than a PUD (but at least as big as a PMD) is always mapped * into a single PMD table. * * If that does not hold for an architecture, then that architecture * must disable split PT locks such that all *_lockptr() functions * will give us the same result: the per-MM PT lock. * * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() * and core-mm would use pmd_lockptr(). However, in such configurations * split PMD locks are disabled -- they don't make sense on a single * PGDIR page table -- and the end result is the same. */ if (size >= PUD_SIZE) return pud_lockptr(mm, (pud_t *) pte); else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ return ptep_lockptr(mm, pte); } #ifndef hugepages_supported /* * Some platform decide whether they support huge pages at boot * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 * when there is no such support */ #define hugepages_supported() (HPAGE_SHIFT != 0) #endif void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); static inline void hugetlb_count_init(struct mm_struct *mm) { atomic_long_set(&mm->hugetlb_usage, 0); } static inline void hugetlb_count_add(long l, struct mm_struct *mm) { atomic_long_add(l, &mm->hugetlb_usage); } static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { atomic_long_sub(l, &mm->hugetlb_usage); } #ifndef huge_ptep_modify_prot_start #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { unsigned long psize = huge_page_size(hstate_vma(vma)); return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif #ifndef huge_ptep_modify_prot_commit #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { unsigned long psize = huge_page_size(hstate_vma(vma)); set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif #ifdef CONFIG_NUMA void hugetlb_register_node(struct node *node); void hugetlb_unregister_node(struct node *node); #endif /* * Check if a given raw @page in a hugepage is HWPOISON. */ bool is_raw_hwpoison_page_in_hugepage(struct page *page); static inline unsigned long huge_page_mask_align(struct file *file) { return PAGE_MASK & ~huge_page_mask(hstate_file(file)); } #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; static inline unsigned long huge_page_mask_align(struct file *file) { return 0; } static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return NULL; } static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, struct address_space *mapping, pgoff_t idx) { return NULL; } static inline int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { return -ENOMEM; } static inline int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) { return 0; } static inline void wait_for_freed_hugetlb_folios(void) { } static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner) { return NULL; } static inline struct folio * alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } static inline struct folio * alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback) { return NULL; } static inline int __alloc_bootmem_huge_page(struct hstate *h) { return 0; } static inline struct hstate *hstate_file(struct file *f) { return NULL; } static inline struct hstate *hstate_sizelog(int page_size_log) { return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) { return NULL; } static inline struct hstate *folio_hstate(struct folio *folio) { return NULL; } static inline struct hstate *size_to_hstate(unsigned long size) { return NULL; } static inline unsigned long huge_page_size(struct hstate *h) { return PAGE_SIZE; } static inline unsigned long huge_page_mask(struct hstate *h) { return PAGE_MASK; } static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) { return PAGE_SIZE; } static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { return PAGE_SIZE; } static inline unsigned int huge_page_order(struct hstate *h) { return 0; } static inline unsigned int huge_page_shift(struct hstate *h) { return PAGE_SHIFT; } static inline bool hstate_is_gigantic(struct hstate *h) { return false; } static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; } static inline unsigned hstate_index_to_shift(unsigned index) { return 0; } static inline int hstate_index(struct hstate *h) { return 0; } static inline int dissolve_free_hugetlb_folio(struct folio *folio) { return 0; } static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn) { return 0; } static inline bool hugepage_migration_supported(struct hstate *h) { return false; } static inline bool hugepage_movable_supported(struct hstate *h) { return false; } static inline gfp_t htlb_alloc_mask(struct hstate *h) { return 0; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) { return 0; } static inline bool htlb_allow_alloc_fallback(int reason) { return false; } static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { return &mm->page_table_lock; } static inline void hugetlb_count_init(struct mm_struct *mm) { } static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) { } static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { } static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { #ifdef CONFIG_MMU return ptep_get(ptep); #else return *ptep; #endif } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { } static inline void hugetlb_register_node(struct node *node) { } static inline void hugetlb_unregister_node(struct node *node) { } static inline bool hugetlbfs_pagecache_present( struct hstate *h, struct vm_area_struct *vma, unsigned long address) { return false; } #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { spinlock_t *ptl; ptl = huge_pte_lockptr(h, mm, pte); spin_lock(ptl); return ptl; } #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) extern void __init hugetlb_cma_reserve(int order); #else static inline __init void hugetlb_cma_reserve(int order) { } #endif #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline bool hugetlb_pmd_shared(pte_t *pte) { return page_count(virt_to_page(pte)) > 1; } #else static inline bool hugetlb_pmd_shared(pte_t *pte) { return false; } #endif bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE /* * ARCHes with special requirements for evicting HUGETLB backing TLB entries can * implement this. */ #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) #endif static inline bool __vma_shareable_lock(struct vm_area_struct *vma) { return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } bool __vma_private_lock(struct vm_area_struct *vma); /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). */ static inline pte_t * hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { #if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; /* * If pmd sharing possible, locking needed to safely walk the * hugetlb pgtables. More information can be found at the comment * above huge_pte_offset() in the same file. * * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. */ if (__vma_shareable_lock(vma)) WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && !lockdep_is_held( &vma->vm_file->f_mapping->i_mmap_rwsem)); #endif return huge_pte_offset(vma->vm_mm, addr, sz); } #endif /* _LINUX_HUGETLB_H */
5 178 178 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IPv6 * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ /* * Fixes: * * Ralf Baechle : generic ipv6 checksum * <ralf@waldorf-gmbh.de> */ #ifndef _CHECKSUM_IPV6_H #define _CHECKSUM_IPV6_H #include <asm/types.h> #include <asm/byteorder.h> #include <net/ip.h> #include <asm/checksum.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/ipv6.h> #ifndef _HAVE_ARCH_IPV6_CSUM __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum csum); #endif static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) { return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, proto, 0)); } static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline void __tcp_v6_send_check(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct tcphdr *th = tcp_hdr(skb); th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); ipv6h->payload_len = 0; th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); } static inline __sum16 udp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif
447 449 353 355 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 // SPDX-License-Identifier: GPL-2.0 /* * shstk.c - Intel shadow stack support * * Copyright (c) 2021, Intel Corporation. * Yu-cheng Yu <yu-cheng.yu@intel.com> */ #include <linux/sched.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/sched/signal.h> #include <linux/compat.h> #include <linux/sizes.h> #include <linux/user.h> #include <linux/syscalls.h> #include <asm/msr.h> #include <asm/fpu/xstate.h> #include <asm/fpu/types.h> #include <asm/shstk.h> #include <asm/special_insns.h> #include <asm/fpu/api.h> #include <asm/prctl.h> #define SS_FRAME_SIZE 8 static bool features_enabled(unsigned long features) { return current->thread.features & features; } static void features_set(unsigned long features) { current->thread.features |= features; } static void features_clr(unsigned long features) { current->thread.features &= ~features; } /* * Create a restore token on the shadow stack. A token is always 8-byte * and aligned to 8. */ static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) { unsigned long addr; /* Token must be aligned */ if (!IS_ALIGNED(ssp, 8)) return -EINVAL; addr = ssp - SS_FRAME_SIZE; /* * SSP is aligned, so reserved bits and mode bit are a zero, just mark * the token 64-bit. */ ssp |= BIT(0); if (write_user_shstk_64((u64 __user *)addr, (u64)ssp)) return -EFAULT; if (token_addr) *token_addr = addr; return 0; } /* * VM_SHADOW_STACK will have a guard page. This helps userspace protect * itself from attacks. The reasoning is as follows: * * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The * INCSSP instruction can increment the shadow stack pointer. It is the * shadow stack analog of an instruction like: * * addq $0x80, %rsp * * However, there is one important difference between an ADD on %rsp * and INCSSP. In addition to modifying SSP, INCSSP also reads from the * memory of the first and last elements that were "popped". It can be * thought of as acting like this: * * READ_ONCE(ssp); // read+discard top element on stack * ssp += nr_to_pop * 8; // move the shadow stack * READ_ONCE(ssp-8); // read+discard last popped stack element * * The maximum distance INCSSP can move the SSP is 2040 bytes, before * it would read the memory. Therefore a single page gap will be enough * to prevent any operation from shifting the SSP to an adjacent stack, * since it would have to land in the gap at least once, causing a * fault. */ static unsigned long alloc_shstk(unsigned long addr, unsigned long size, unsigned long token_offset, bool set_res_tok) { int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G; struct mm_struct *mm = current->mm; unsigned long mapped_addr, unused; if (addr) flags |= MAP_FIXED_NOREPLACE; mmap_write_lock(mm); mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags, VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); mmap_write_unlock(mm); if (!set_res_tok || IS_ERR_VALUE(mapped_addr)) goto out; if (create_rstor_token(mapped_addr + token_offset, NULL)) { vm_munmap(mapped_addr, size); return -EINVAL; } out: return mapped_addr; } static unsigned long adjust_shstk_size(unsigned long size) { if (size) return PAGE_ALIGN(size); return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); } static void unmap_shadow_stack(u64 base, u64 size) { int r; r = vm_munmap(base, size); /* * mmap_write_lock_killable() failed with -EINTR. This means * the process is about to die and have it's MM cleaned up. * This task shouldn't ever make it back to userspace. In this * case it is ok to leak a shadow stack, so just exit out. */ if (r == -EINTR) return; /* * For all other types of vm_munmap() failure, either the * system is out of memory or there is bug. */ WARN_ON_ONCE(r); } static int shstk_setup(void) { struct thread_shstk *shstk = &current->thread.shstk; unsigned long addr, size; /* Already enabled */ if (features_enabled(ARCH_SHSTK_SHSTK)) return 0; /* Also not supported for 32 bit */ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_ia32_syscall()) return -EOPNOTSUPP; size = adjust_shstk_size(0); addr = alloc_shstk(0, size, 0, false); if (IS_ERR_VALUE(addr)) return PTR_ERR((void *)addr); fpregs_lock_and_load(); wrmsrl(MSR_IA32_PL3_SSP, addr + size); wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN); fpregs_unlock(); shstk->base = addr; shstk->size = size; features_set(ARCH_SHSTK_SHSTK); return 0; } void reset_thread_features(void) { memset(&current->thread.shstk, 0, sizeof(struct thread_shstk)); current->thread.features = 0; current->thread.features_locked = 0; } unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags, unsigned long stack_size) { struct thread_shstk *shstk = &tsk->thread.shstk; unsigned long addr, size; /* * If shadow stack is not enabled on the new thread, skip any * switch to a new shadow stack. */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; /* * For CLONE_VFORK the child will share the parents shadow stack. * Make sure to clear the internal tracking of the thread shadow * stack so the freeing logic run for child knows to leave it alone. */ if (clone_flags & CLONE_VFORK) { shstk->base = 0; shstk->size = 0; return 0; } /* * For !CLONE_VM the child will use a copy of the parents shadow * stack. */ if (!(clone_flags & CLONE_VM)) return 0; size = adjust_shstk_size(stack_size); addr = alloc_shstk(0, size, 0, false); if (IS_ERR_VALUE(addr)) return addr; shstk->base = addr; shstk->size = size; return addr + size; } static unsigned long get_user_shstk_addr(void) { unsigned long long ssp; fpregs_lock_and_load(); rdmsrl(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return ssp; } #define SHSTK_DATA_BIT BIT(63) static int put_shstk_data(u64 __user *addr, u64 data) { if (WARN_ON_ONCE(data & SHSTK_DATA_BIT)) return -EINVAL; /* * Mark the high bit so that the sigframe can't be processed as a * return address. */ if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT)) return -EFAULT; return 0; } static int get_shstk_data(unsigned long *data, unsigned long __user *addr) { unsigned long ldata; if (unlikely(get_user(ldata, addr))) return -EFAULT; if (!(ldata & SHSTK_DATA_BIT)) return -EINVAL; *data = ldata & ~SHSTK_DATA_BIT; return 0; } static int shstk_push_sigframe(unsigned long *ssp) { unsigned long target_ssp = *ssp; /* Token must be aligned */ if (!IS_ALIGNED(target_ssp, 8)) return -EINVAL; *ssp -= SS_FRAME_SIZE; if (put_shstk_data((void __user *)*ssp, target_ssp)) return -EFAULT; return 0; } static int shstk_pop_sigframe(unsigned long *ssp) { struct vm_area_struct *vma; unsigned long token_addr; bool need_to_check_vma; int err = 1; /* * It is possible for the SSP to be off the end of a shadow stack by 4 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes * before it, it might be this case, so check that the address being * read is actually shadow stack. */ if (!IS_ALIGNED(*ssp, 8)) return -EINVAL; need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; if (need_to_check_vma) mmap_read_lock_killable(current->mm); err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); if (unlikely(err)) goto out_err; if (need_to_check_vma) { vma = find_vma(current->mm, *ssp); if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) { err = -EFAULT; goto out_err; } mmap_read_unlock(current->mm); } /* Restore SSP aligned? */ if (unlikely(!IS_ALIGNED(token_addr, 8))) return -EINVAL; /* SSP in userspace? */ if (unlikely(token_addr >= TASK_SIZE_MAX)) return -EINVAL; *ssp = token_addr; return 0; out_err: if (need_to_check_vma) mmap_read_unlock(current->mm); return err; } int setup_signal_shadow_stack(struct ksignal *ksig) { void __user *restorer = ksig->ka.sa.sa_restorer; unsigned long ssp; int err; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return 0; if (!restorer) return -EINVAL; ssp = get_user_shstk_addr(); if (unlikely(!ssp)) return -EINVAL; err = shstk_push_sigframe(&ssp); if (unlikely(err)) return err; /* Push restorer address */ ssp -= SS_FRAME_SIZE; err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer); if (unlikely(err)) return -EFAULT; fpregs_lock_and_load(); wrmsrl(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return 0; } int restore_signal_shadow_stack(void) { unsigned long ssp; int err; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return 0; ssp = get_user_shstk_addr(); if (unlikely(!ssp)) return -EINVAL; err = shstk_pop_sigframe(&ssp); if (unlikely(err)) return err; fpregs_lock_and_load(); wrmsrl(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return 0; } void shstk_free(struct task_struct *tsk) { struct thread_shstk *shstk = &tsk->thread.shstk; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return; /* * When fork() with CLONE_VM fails, the child (tsk) already has a * shadow stack allocated, and exit_thread() calls this function to * free it. In this case the parent (current) and the child share * the same mm struct. */ if (!tsk->mm || tsk->mm != current->mm) return; /* * If shstk->base is NULL, then this task is not managing its * own shadow stack (CLONE_VFORK). So skip freeing it. */ if (!shstk->base) return; /* * shstk->base is NULL for CLONE_VFORK child tasks, and so is * normal. But size = 0 on a shstk->base is not normal and * indicated an attempt to free the thread shadow stack twice. * Warn about it. */ if (WARN_ON(!shstk->size)) return; unmap_shadow_stack(shstk->base, shstk->size); shstk->size = 0; } static int wrss_control(bool enable) { u64 msrval; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; /* * Only enable WRSS if shadow stack is enabled. If shadow stack is not * enabled, WRSS will already be disabled, so don't bother clearing it * when disabling. */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return -EPERM; /* Already enabled/disabled? */ if (features_enabled(ARCH_SHSTK_WRSS) == enable) return 0; fpregs_lock_and_load(); rdmsrl(MSR_IA32_U_CET, msrval); if (enable) { features_set(ARCH_SHSTK_WRSS); msrval |= CET_WRSS_EN; } else { features_clr(ARCH_SHSTK_WRSS); if (!(msrval & CET_WRSS_EN)) goto unlock; msrval &= ~CET_WRSS_EN; } wrmsrl(MSR_IA32_U_CET, msrval); unlock: fpregs_unlock(); return 0; } static int shstk_disable(void) { if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; /* Already disabled? */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; fpregs_lock_and_load(); /* Disable WRSS too when disabling shadow stack */ wrmsrl(MSR_IA32_U_CET, 0); wrmsrl(MSR_IA32_PL3_SSP, 0); fpregs_unlock(); shstk_free(current); features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS); return 0; } SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) { bool set_tok = flags & SHADOW_STACK_SET_TOKEN; unsigned long aligned_size; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; if (flags & ~SHADOW_STACK_SET_TOKEN) return -EINVAL; /* If there isn't space for a token */ if (set_tok && size < 8) return -ENOSPC; if (addr && addr < SZ_4G) return -ERANGE; /* * An overflow would result in attempting to write the restore token * to the wrong location. Not catastrophic, but just return the right * error code and block it. */ aligned_size = PAGE_ALIGN(size); if (aligned_size < size) return -EOVERFLOW; return alloc_shstk(addr, aligned_size, size, set_tok); } long shstk_prctl(struct task_struct *task, int option, unsigned long arg2) { unsigned long features = arg2; if (option == ARCH_SHSTK_STATUS) { return put_user(task->thread.features, (unsigned long __user *)arg2); } if (option == ARCH_SHSTK_LOCK) { task->thread.features_locked |= features; return 0; } /* Only allow via ptrace */ if (task != current) { if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) { task->thread.features_locked &= ~features; return 0; } return -EINVAL; } /* Do not allow to change locked features */ if (features & task->thread.features_locked) return -EPERM; /* Only support enabling/disabling one feature at a time. */ if (hweight_long(features) > 1) return -EINVAL; if (option == ARCH_SHSTK_DISABLE) { if (features & ARCH_SHSTK_WRSS) return wrss_control(false); if (features & ARCH_SHSTK_SHSTK) return shstk_disable(); return -EINVAL; } /* Handle ARCH_SHSTK_ENABLE */ if (features & ARCH_SHSTK_SHSTK) return shstk_setup(); if (features & ARCH_SHSTK_WRSS) return wrss_control(true); return -EINVAL; } int shstk_update_last_frame(unsigned long val) { unsigned long ssp; if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; ssp = get_user_shstk_addr(); return write_user_shstk_64((u64 __user *)ssp, (u64)val); } bool shstk_is_enabled(void) { return features_enabled(ARCH_SHSTK_SHSTK); }
185 152 32 32 36 37 37 36 37 7 25 83 6 77 83 52 82 82 19 63 82 72 10 7 34 13 34 21 13 38 17 2 55 55 55 57 14 7 4 3 4 1 49 49 1 1 3 3 13 4 15 8 15 15 15 15 13 2 19 21 8 13 21 21 21 17 8 15 35 35 21 4 2 4 7 4 34 34 7 7 48 20 38 38 75 17 22 10 36 12 14 14 85 4 42 42 42 42 8 7 8 22 22 8 14 14 8 20 20 6 6 9 1 9 1 3 15 54 54 8 47 54 339 244 70 31 54 24 3 1 1 2 8 54 79 14 93 22 3 25 2 19 19 2 17 84 74 13 21 20 1 31 31 28 3 1 3 8 23 1 2 5 3 1 44 1 35 8 6 7 5 3 5 2 42 38 3 7 28 8 30 2 5 24 7 1 10 2 2 18 20 3 1 2 2 22 7 1 28 1 13 1 1 198 166 26 13 33 33 33 33 205 206 127 127 87 123 17 4 205 4 1 1 1 1 2 2 2 601 600 599 544 68 601 1 3 3 2 1946 1579 398 92 60 36 551 36 67 407 220 51 521 520 521 521 521 521 521 84 29 314 259 28 13 18 6 60 1 20 12 27 23 56 37 29 9 2 1 17 8 5 56 51 21 1 34 55 1 21 34 55 3 9 51 21 2 11 21 55 55 34 21 57 8 63 30 1 2 93 4 86 2 3 2 2 7 3 5 2 4 5 1 4 5 5 4 9 1 35 1 1 151 42 25 97 218 9 42 180 127 97 215 215 1 214 14 30 50 116 10 2 41 168 207 206 1 207 205 121 21 64 165 12 18 165 137 241 17 231 44 2 42 42 41 39 3 42 36 40 42 29 13 42 16 10 6 16 1 1 5 11 7 6 13 5 10 10 4 7 13 10 5 13 2 11 11 3 16 2 1 18 2 1 14 40 40 31 8 39 39 31 1 1 28 24 4 5 21 3 2 21 23 9 2 15 4 6 1 7 7 1 1 23 3 2 1 1 1 4 2 2 1 1 2 8 1 1 18 6 1 2 15 2 13 26 3 1 24 5 3 2 2 23 23 23 19 17 16 6 4 4 2 1 38 4 4 258 1 126 2 1 1 26 17 2 2 1 1 1 1 1 1 1 4 1 1 1 2 3 1 1 3 2 1 1 3 1 1 2 3 10 4 5 3 5 1 1 2 43 1 41 2 44 1 2 2 1 1 1 3 2 1 4 27 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2109 2107 2109 345 111 15 138 6 3 7 4 121 2 2 2107 101 99 2 1 58 50 11 58 50 9 10 10 14 14 20 1 19 2 2 38 38 69 8 1 64 3 35 18 3 3 47 2 2 2 2 38 6 4 1 4 22 20 11 39 45 8 44 11 39 44 2 20 20 1 19 19 15 7 2 13 13 13 13 13 13 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * PACKET - implements raw packet sockets. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * * Fixes: * Alan Cox : verify_area() now used correctly * Alan Cox : new skbuff lists, look ma no backlogs! * Alan Cox : tidied skbuff lists. * Alan Cox : Now uses generic datagram routines I * added. Also fixed the peek/read crash * from all old Linux datagram code. * Alan Cox : Uses the improved datagram code. * Alan Cox : Added NULL's for socket options. * Alan Cox : Re-commented the code. * Alan Cox : Use new kernel side addressing * Rob Janssen : Correct MTU usage. * Dave Platt : Counter leaks caused by incorrect * interrupt locking and some slightly * dubious gcc output. Can you read * compiler: it said _VOLATILE_ * Richard Kooijman : Timestamp fixes. * Alan Cox : New buffers. Use sk->mac.raw. * Alan Cox : sendmsg/recvmsg support. * Alan Cox : Protocol setting support * Alexey Kuznetsov : Untied from IPv4 stack. * Cyrus Durgin : Fixed kerneld for kmod. * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. * Eric Biederman : Allow for > 8 byte hardware addresses. * The convention is that longer addresses * will simply extend the hardware address * byte arrays at the end of sockaddr_ll * and packet_mreq. * Johann Baudy : Added TX RING. * Chetan Loke : Implemented TPACKET_V3 block abstraction * layer. * Copyright (C) 2011, <lokec@ccs.neu.edu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> #include <linux/filter.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/capability.h> #include <linux/fcntl.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_packet.h> #include <linux/wireless.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <asm/io.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mutex.h> #include <linux/if_vlan.h> #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> #include <linux/percpu.h> #ifdef CONFIG_INET #include <net/inet_common.h> #endif #include <linux/bpf.h> #include <net/compat.h> #include <linux/netfilter_netdev.h> #include "internal.h" /* Assumptions: - If the device has no dev->header_ops->create, there is no LL header visible above the device. In this case, its hard_header_len should be 0. The device may prepend its own header internally. In this case, its needed_headroom should be set to the space needed for it to add its internal header. For example, a WiFi driver pretending to be an Ethernet driver should set its hard_header_len to be the Ethernet header length, and set its needed_headroom to be (the real WiFi header length - the fake Ethernet header length). - packet socket receives packets with pulled ll header, so that SOCK_RAW should push it back. On receive: ----------- Incoming, dev_has_header(dev) == true mac_header -> ll header data -> data Outgoing, dev_has_header(dev) == true mac_header -> ll header data -> ll header Incoming, dev_has_header(dev) == false mac_header -> data However drivers often make it point to the ll header. This is incorrect because the ll header should be invisible to us. data -> data Outgoing, dev_has_header(dev) == false mac_header -> data. ll header is invisible to us. data -> data Resume If dev_has_header(dev) == false we are unable to restore the ll header, because it is invisible to us. On transmit: ------------ dev_has_header(dev) == true mac_header -> ll header data -> ll header dev_has_header(dev) == false (ll header is invisible to us) mac_header -> data data -> data We should set network_header on output to the correct position, packet classifier depends on it. */ /* Private packet socket structures. */ /* identical to struct packet_mreq except it has * a longer address field. */ struct packet_mreq_max { int mr_ifindex; unsigned short mr_type; unsigned short mr_alen; unsigned char mr_address[MAX_ADDR_LEN]; }; union tpacket_uhdr { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; struct tpacket3_hdr *h3; void *raw; }; static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); #define V3_ALIGNMENT (8) #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) struct packet_sock; static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status); static void packet_increment_head(struct packet_ring_buffer *buff); static int prb_curr_blk_in_use(struct tpacket_block_desc *); static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, struct packet_sock *); static void prb_retire_current_block(struct tpacket_kbdq_core *, struct packet_sock *, unsigned int status); static int prb_queue_frozen(struct tpacket_kbdq_core *); static void prb_open_block(struct tpacket_kbdq_core *, struct tpacket_block_desc *); static void prb_retire_rx_blk_timer_expired(struct timer_list *); static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_clear_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); static u16 packet_pick_tx_queue(struct sk_buff *skb); struct packet_skb_cb { union { struct sockaddr_pkt pkt; union { /* Trick: alias skb original length with * ll.sll_family and ll.protocol in order * to save room. */ unsigned int origlen; struct sockaddr_ll ll; }; } sa; }; #define vio_le() virtio_legacy_is_little_endian() #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) #define GET_PBLOCK_DESC(x, bid) \ ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) #define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) #define GET_NEXT_PRB_BLK_NUM(x) \ (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); #ifdef CONFIG_NETFILTER_EGRESS static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) { struct sk_buff *next, *head = NULL, *tail; int rc; rcu_read_lock(); for (; skb != NULL; skb = next) { next = skb->next; skb_mark_not_on_list(skb); if (!nf_hook_egress(skb, &rc, skb->dev)) continue; if (!head) head = skb; else tail->next = skb; tail = skb; } rcu_read_unlock(); return head; } #endif static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb) { if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS)) return dev_queue_xmit(skb); #ifdef CONFIG_NETFILTER_EGRESS if (nf_hook_egress_active()) { skb = nf_hook_direct_egress(skb); if (!skb) return NET_XMIT_DROP; } #endif return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(po->cached_dev); dev_hold(dev); rcu_read_unlock(); return dev; } static void packet_cached_dev_assign(struct packet_sock *po, struct net_device *dev) { rcu_assign_pointer(po->cached_dev, dev); } static void packet_cached_dev_reset(struct packet_sock *po) { RCU_INIT_POINTER(po->cached_dev, NULL); } static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; int cpu = raw_smp_processor_id(); u16 queue_index; #ifdef CONFIG_XPS skb->sender_cpu = cpu + 1; #endif skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb, NULL); queue_index = netdev_cap_txqueue(dev, queue_index); } else { queue_index = netdev_pick_tx(dev, skb, NULL); } return queue_index; } /* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } static void register_prot_hook(struct sock *sk) { lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); __register_prot_hook(sk); } /* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the * callers responsibility to take care of this. */ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); lockdep_assert_held_once(&po->bind_lock); packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); __sock_put(sk); if (sync) { spin_unlock(&po->bind_lock); synchronize_net(); spin_lock(&po->bind_lock); } } static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); } static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ h.raw = frame; switch (po->tp_version) { case TPACKET_V1: WRITE_ONCE(h.h1->tp_status, status); flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: WRITE_ONCE(h.h2->tp_status, status); flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: WRITE_ONCE(h.h3->tp_status, status); flush_dcache_page(pgv_to_page(&h.h3->tp_status)); break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } smp_wmb(); } static int __packet_get_status(const struct packet_sock *po, void *frame) { union tpacket_uhdr h; smp_rmb(); /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return READ_ONCE(h.h1->tp_status); case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return READ_ONCE(h.h2->tp_status); case TPACKET_V3: flush_dcache_page(pgv_to_page(&h.h3->tp_status)); return READ_ONCE(h.h3->tp_status); default: WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } } static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps && (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; if ((flags & SOF_TIMESTAMPING_SOFTWARE) && ktime_to_timespec64_cond(skb_tstamp(skb), ts)) return TP_STATUS_TS_SOFTWARE; return 0; } static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct sk_buff *skb) { union tpacket_uhdr h; struct timespec64 ts; __u32 ts_status; if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; /* * versions 1 through 3 overflow the timestamps in y2106, since they * all store the seconds in a 32-bit unsigned integer. * If we create a version 4, that should have a 64-bit timestamp, * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit * nanoseconds. */ switch (po->tp_version) { case TPACKET_V1: h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; break; case TPACKET_V2: h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; break; case TPACKET_V3: h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; break; default: WARN(1, "TPACKET version not supported.\n"); BUG(); } /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); return ts_status; } static void *packet_lookup_frame(const struct packet_sock *po, const struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; h.raw = rb->pg_vec[pg_vec_pos].buffer + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; return h.raw; } static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); } static u16 vlan_get_tci(const struct sk_buff *skb, struct net_device *dev) { struct vlan_hdr vhdr, *vh; unsigned int header_len; if (!dev) return 0; /* In the SOCK_DGRAM scenario, skb data starts at the network * protocol, which is after the VLAN headers. The outer VLAN * header is at the hard_header_len offset in non-variable * length link layer headers. If it's a VLAN device, the * min_header_len should be used to exclude the VLAN header * size. */ if (dev->min_header_len == dev->hard_header_len) header_len = dev->hard_header_len; else if (is_vlan_dev(dev)) header_len = dev->min_header_len; else return 0; vh = skb_header_pointer(skb, skb_mac_offset(skb) + header_len, sizeof(vhdr), &vhdr); if (unlikely(!vh)) return 0; return ntohs(vh->h_vlan_TCI); } static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb) { __be16 proto = skb->protocol; if (unlikely(eth_type_vlan(proto))) proto = __vlan_get_protocol_offset(skb, proto, skb_mac_offset(skb), NULL); return proto; } static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); } static void prb_shutdown_retire_blk_timer(struct packet_sock *po, struct sk_buff_head *rb_queue) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, 0); pkc->retire_blk_timer.expires = jiffies; } static int prb_calc_retire_blk_tmo(struct packet_sock *po, int blk_size_in_bytes) { struct net_device *dev; unsigned int mbits, div; struct ethtool_link_ksettings ecmd; int err; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); if (unlikely(!dev)) { rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_link_ksettings(dev, &ecmd); rtnl_unlock(); if (err) return DEFAULT_PRB_RETIRE_TOV; /* If the link speed is so slow you don't really * need to worry about perf anyways */ if (ecmd.base.speed < SPEED_1000 || ecmd.base.speed == SPEED_UNKNOWN) return DEFAULT_PRB_RETIRE_TOV; div = ecmd.base.speed / 1000; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); if (div) mbits /= div; if (div) return mbits + 1; return mbits; } static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, union tpacket_req_u *req_u) { p1->feature_req_word = req_u->req3.tp_feature_req_word; } static void init_prb_bdqc(struct packet_sock *po, struct packet_ring_buffer *rb, struct pgv *pg_vec, union tpacket_req_u *req_u) { struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; rwlock_init(&p1->blk_fill_in_prog_lock); p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po); prb_open_block(p1, pbd); } /* Do NOT update the last_blk_num first. * Assumes sk_buff_head lock is held. */ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) { mod_timer(&pkc->retire_blk_timer, jiffies + pkc->tov_in_jiffies); pkc->last_kactive_blk_num = pkc->kactive_blk_num; } /* * Timer logic: * 1) We refresh the timer only when we open a block. * By doing this we don't waste cycles refreshing the timer * on packet-by-packet basis. * * With a 1MB block-size, on a 1Gbps line, it will take * i) ~8 ms to fill a block + ii) memcpy etc. * In this cut we are not accounting for the memcpy time. * * So, if the user sets the 'tmo' to 10ms then the timer * will never fire while the block is still getting filled * (which is what we want). However, the user could choose * to close a block early and that's fine. * * But when the timer does fire, we check whether or not to refresh it. * Since the tmo granularity is in msecs, it is not too expensive * to refresh the timer, lets say every '8' msecs. * Either the user can set the 'tmo' or we can derive it based on * a) line-speed and b) block-size. * prb_calc_retire_blk_tmo() calculates the tmo. * */ static void prb_retire_rx_blk_timer_expired(struct timer_list *t) { struct packet_sock *po = from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer); struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; spin_lock(&po->sk.sk_receive_queue.lock); frozen = prb_queue_frozen(pkc); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); if (unlikely(pkc->delete_blk_timer)) goto out; /* We only need to plug the race when the block is partially filled. * tpacket_rcv: * lock(); increment BLOCK_NUM_PKTS; unlock() * copy_bits() is in progress ... * timer fires on other cpu: * we can't retire the current block because copy_bits * is in progress. * */ if (BLOCK_NUM_PKTS(pbd)) { /* Waiting for skb_copy_bits to finish... */ write_lock(&pkc->blk_fill_in_prog_lock); write_unlock(&pkc->blk_fill_in_prog_lock); } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { if (!BLOCK_NUM_PKTS(pbd)) { /* An empty block. Just refresh the timer. */ goto refresh_timer; } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; else goto out; } else { /* Case 1. Queue was frozen because user-space was * lagging behind. */ if (prb_curr_blk_in_use(pbd)) { /* * Ok, user-space is still behind. * So just refresh the timer. */ goto refresh_timer; } else { /* Case 2. queue was frozen,user-space caught up, * now the link went idle && the timer fired. * We don't have a block to close.So we open this * block and restart the timer. * opening a block thaws the queue,restarts timer * Thawing/timer-refresh is a side effect. */ prb_open_block(pkc, pbd); goto out; } } } refresh_timer: _prb_refresh_rx_retire_blk_timer(pkc); out: spin_unlock(&po->sk.sk_receive_queue.lock); } static void prb_flush_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, __u32 status) { /* Flush everything minus the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 u8 *start, *end; start = (u8 *)pbd1; /* Skip the block header(we know header WILL fit in 4K) */ start += PAGE_SIZE; end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); for (; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif /* Now update the block status. */ BLOCK_STATUS(pbd1) = status; /* Flush the block header */ #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 start = (u8 *)pbd1; flush_dcache_page(pgv_to_page(start)); smp_wmb(); #endif } /* * Side effect: * * 1) flush the block * 2) Increment active_blk_num * * Note:We DONT refresh the timer on purpose. * Because almost always the next block will be opened. */ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1, struct packet_sock *po, unsigned int stat) { __u32 status = TP_STATUS_USER | stat; struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; if (atomic_read(&po->tp_drops)) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; last_pkt->tp_next_offset = 0; /* Get the ts of the last pkt */ if (BLOCK_NUM_PKTS(pbd1)) { h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { /* Ok, we tmo'd - so get the current time. * * It shouldn't really happen as we don't close empty * blocks. See prb_retire_rx_blk_timer_expired(). */ struct timespec64 ts; ktime_get_real_ts64(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; h1->ts_last_pkt.ts_nsec = ts.tv_nsec; } smp_wmb(); /* Flush the block */ prb_flush_block(pkc1, pbd1, status); sk->sk_data_ready(sk); pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); } static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) { pkc->reset_pending_on_curr_blk = 0; } /* * Side effect of opening a block: * * 1) prb_queue is thawed. * 2) retire_blk_timer is refreshed. * */ static void prb_open_block(struct tpacket_kbdq_core *pkc1, struct tpacket_block_desc *pbd1) { struct timespec64 ts; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; smp_rmb(); /* We could have just memset this but we will lose the * flexibility of making the priv area sticky */ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; BLOCK_NUM_PKTS(pbd1) = 0; BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); ktime_get_real_ts64(&ts); h1->ts_first_pkt.ts_sec = ts.tv_sec; h1->ts_first_pkt.ts_nsec = ts.tv_nsec; pkc1->pkblk_start = (char *)pbd1; pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; pbd1->version = pkc1->version; pkc1->prev = pkc1->nxt_offset; pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; prb_thaw_queue(pkc1); _prb_refresh_rx_retire_blk_timer(pkc1); smp_wmb(); } /* * Queue freeze logic: * 1) Assume tp_block_nr = 8 blocks. * 2) At time 't0', user opens Rx ring. * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 * 4) user-space is either sleeping or processing block '0'. * 5) tpacket_rcv is currently filling block '7', since there is no space left, * it will close block-7,loop around and try to fill block '0'. * call-flow: * __packet_lookup_frame_in_block * prb_retire_current_block() * prb_dispatch_next_block() * |->(BLOCK_STATUS == USER) evaluates to true * 5.1) Since block-0 is currently in-use, we just freeze the queue. * 6) Now there are two cases: * 6.1) Link goes idle right after the queue is frozen. * But remember, the last open_block() refreshed the timer. * When this timer expires,it will refresh itself so that we can * re-open block-0 in near future. * 6.2) Link is busy and keeps on receiving packets. This is a simple * case and __packet_lookup_frame_in_block will check if block-0 * is free and can now be re-used. */ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { pkc->reset_pending_on_curr_blk = 1; po->stats.stats3.tp_freeze_q_cnt++; } #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) /* * If the next block is free then we will dispatch it * and return a good offset. * Else, we will freeze the queue. * So, caller must check the return value. */ static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { struct tpacket_block_desc *pbd; smp_rmb(); /* 1. Get current block num */ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* 2. If this block is currently in_use then freeze the queue */ if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { prb_freeze_queue(pkc, po); return NULL; } /* * 3. * open this block and return the offset where the first packet * needs to get stored. */ prb_open_block(pkc, pbd); return (void *)pkc->nxt_offset; } static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, struct packet_sock *po, unsigned int status) { struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* retire/close the current block */ if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { /* * Plug the case where copy_bits() is in progress on * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't * have space to copy the pkt in the current block and * called prb_retire_current_block() * * We don't need to worry about the TMO case because * the timer-handler already handled this case. */ if (!(status & TP_STATUS_BLK_TMO)) { /* Waiting for skb_copy_bits to finish... */ write_lock(&pkc->blk_fill_in_prog_lock); write_unlock(&pkc->blk_fill_in_prog_lock); } prb_close_block(pkc, pbd, po, status); return; } } static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) { return TP_STATUS_USER & BLOCK_STATUS(pbd); } static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) { return pkc->reset_pending_on_curr_blk; } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); read_unlock(&pkc->blk_fill_in_prog_lock); } static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_rxhash = 0; } static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc); if (skb_vlan_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) { ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; ppd->hv1.tp_vlan_tpid = 0; ppd->tp_status = TP_STATUS_AVAILABLE; } } static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) prb_fill_rxhash(pkc, ppd); else prb_clear_rxhash(pkc, ppd); } static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; ppd = (struct tpacket3_hdr *)curr; ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); pkc->prev = curr; pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_NUM_PKTS(pbd) += 1; read_lock(&pkc->blk_fill_in_prog_lock); prb_run_all_ft_ops(pkc, ppd); } /* Assumes caller has the sk->rx_queue.lock */ static void *__packet_lookup_frame_in_block(struct packet_sock *po, struct sk_buff *skb, unsigned int len ) { struct tpacket_kbdq_core *pkc; struct tpacket_block_desc *pbd; char *curr, *end; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* Queue is frozen when user space is lagging behind */ if (prb_queue_frozen(pkc)) { /* * Check if that last block which caused the queue to freeze, * is still in_use by user-space. */ if (prb_curr_blk_in_use(pbd)) { /* Can't record this packet */ return NULL; } else { /* * Ok, the block was released by user-space. * Now let's open that block. * opening a block also thaws the queue. * Thawing is a side effect. */ prb_open_block(pkc, pbd); } } smp_mb(); curr = pkc->nxt_offset; pkc->skb = skb; end = (char *)pbd + pkc->kblk_size; /* first try the current block */ if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* Ok, close the current block */ prb_retire_current_block(pkc, po, 0); /* Now, try to dispatch the next block */ curr = (char *)prb_dispatch_next_block(pkc, po); if (curr) { pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); prb_fill_curr_block(curr, pkc, pbd, len); return (void *)curr; } /* * No free blocks are available.user_space hasn't caught up yet. * Queue was just frozen and now this packet will get dropped. */ return NULL; } static void *packet_current_rx_frame(struct packet_sock *po, struct sk_buff *skb, int status, unsigned int len) { char *curr = NULL; switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: curr = packet_lookup_frame(po, &po->rx_ring, po->rx_ring.head, status); return curr; case TPACKET_V3: return __packet_lookup_frame_in_block(po, skb, len); default: WARN(1, "TPACKET version not supported\n"); BUG(); return NULL; } } static void *prb_lookup_block(const struct packet_sock *po, const struct packet_ring_buffer *rb, unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; return pbd; } static int prb_previous_blk_num(struct packet_ring_buffer *rb) { unsigned int prev; if (rb->prb_bdqc.kactive_blk_num) prev = rb->prb_bdqc.kactive_blk_num-1; else prev = rb->prb_bdqc.knum_blocks-1; return prev; } /* Assumes caller has held the rx_queue.lock */ static void *__prb_previous_block(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = prb_previous_blk_num(rb); return prb_lookup_block(po, rb, previous, status); } static void *packet_previous_rx_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { if (po->tp_version <= TPACKET_V2) return packet_previous_frame(po, rb, status); return __prb_previous_block(po, rb, status); } static void packet_increment_rx_head(struct packet_sock *po, struct packet_ring_buffer *rb) { switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: return packet_increment_head(rb); case TPACKET_V3: default: WARN(1, "TPACKET version not supported.\n"); BUG(); return; } } static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; return packet_lookup_frame(po, rb, previous, status); } static void packet_increment_head(struct packet_ring_buffer *buff) { buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } static void packet_inc_pending(struct packet_ring_buffer *rb) { this_cpu_inc(*rb->pending_refcnt); } static void packet_dec_pending(struct packet_ring_buffer *rb) { this_cpu_dec(*rb->pending_refcnt); } static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) { unsigned int refcnt = 0; int cpu; /* We don't use pending refcount in rx_ring. */ if (rb->pending_refcnt == NULL) return 0; for_each_possible_cpu(cpu) refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); return refcnt; } static int packet_alloc_pending(struct packet_sock *po) { po->rx_ring.pending_refcnt = NULL; po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); if (unlikely(po->tx_ring.pending_refcnt == NULL)) return -ENOBUFS; return 0; } static void packet_free_pending(struct packet_sock *po) { free_percpu(po->tx_ring.pending_refcnt); } #define ROOM_POW_OFF 2 #define ROOM_NONE 0x0 #define ROOM_LOW 0x1 #define ROOM_NORMAL 0x2 static bool __tpacket_has_room(const struct packet_sock *po, int pow_off) { int idx, len; len = READ_ONCE(po->rx_ring.frame_max) + 1; idx = READ_ONCE(po->rx_ring.head); if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); } static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) { int idx, len; len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); if (pow_off) idx += len >> pow_off; if (idx >= len) idx -= len; return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); } static int __packet_rcv_has_room(const struct packet_sock *po, const struct sk_buff *skb) { const struct sock *sk = &po->sk; int ret = ROOM_NONE; if (po->prot_hook.func != tpacket_rcv) { int rcvbuf = READ_ONCE(sk->sk_rcvbuf); int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) - (skb ? skb->truesize : 0); if (avail > (rcvbuf >> ROOM_POW_OFF)) return ROOM_NORMAL; else if (avail > 0) return ROOM_LOW; else return ROOM_NONE; } if (po->tp_version == TPACKET_V3) { if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_v3_has_room(po, 0)) ret = ROOM_LOW; } else { if (__tpacket_has_room(po, ROOM_POW_OFF)) ret = ROOM_NORMAL; else if (__tpacket_has_room(po, 0)) ret = ROOM_LOW; } return ret; } static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { bool pressure; int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); return; } } static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) { u32 *history = po->rollover->history; u32 victim, rxhash; int i, count = 0; rxhash = skb_get_hash(skb); for (i = 0; i < ROLLOVER_HLEN; i++) if (READ_ONCE(history[i]) == rxhash) count++; victim = get_random_u32_below(ROLLOVER_HLEN); /* Avoid dirtying the cache line if possible */ if (READ_ONCE(history[victim]) != rxhash) WRITE_ONCE(history[victim], rxhash); return count > (ROLLOVER_HLEN >> 1); } static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { unsigned int val = atomic_inc_return(&f->rr_cur); return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return smp_processor_id() % num; } static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return get_random_u32_below(num); } static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, unsigned int idx, bool try_self, unsigned int num) { struct packet_sock *po, *po_next, *po_skip = NULL; unsigned int i, j, room = ROOM_NONE; po = pkt_sk(rcu_dereference(f->arr[idx])); if (try_self) { room = packet_rcv_has_room(po, skb); if (room == ROOM_NORMAL || (room == ROOM_LOW && !fanout_flow_is_huge(po, skb))) return idx; po_skip = po; } i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); if (po_next != po_skip && !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; atomic_long_inc(&po->rollover->num); if (room == ROOM_LOW) atomic_long_inc(&po->rollover->num_huge); return i; } if (++i == num) i = 0; } while (i != j); atomic_long_inc(&po->rollover->num_failed); return idx; } static unsigned int fanout_demux_qm(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { return skb_get_queue_mapping(skb) % num; } static unsigned int fanout_demux_bpf(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { struct bpf_prog *prog; unsigned int ret = 0; rcu_read_lock(); prog = rcu_dereference(f->bpf_prog); if (prog) ret = bpf_prog_run_clear_cb(prog, skb) % num; rcu_read_unlock(); return ret; } static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; unsigned int num = READ_ONCE(f->num_members); struct net *net = read_pnet(&f->net); struct packet_sock *po; unsigned int idx; if (!net_eq(dev_net(dev), net) || !num) { kfree_skb(skb); return 0; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } switch (f->type) { case PACKET_FANOUT_HASH: default: idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: idx = fanout_demux_cpu(f, skb, num); break; case PACKET_FANOUT_RND: idx = fanout_demux_rnd(f, skb, num); break; case PACKET_FANOUT_QM: idx = fanout_demux_qm(f, skb, num); break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, false, num); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: idx = fanout_demux_bpf(f, skb, num); break; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) idx = fanout_demux_rollover(f, skb, idx, true, num); po = pkt_sk(rcu_dereference(f->arr[idx])); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } DEFINE_MUTEX(fanout_mutex); EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static u16 fanout_next_id; static void __fanout_link(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; spin_lock(&f->lock); rcu_assign_pointer(f->arr[f->num_members], sk); smp_wmb(); f->num_members++; if (f->num_members == 1) dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } static void __fanout_unlink(struct sock *sk, struct packet_sock *po) { struct packet_fanout *f = po->fanout; int i; spin_lock(&f->lock); for (i = 0; i < f->num_members; i++) { if (rcu_dereference_protected(f->arr[i], lockdep_is_held(&f->lock)) == sk) break; } BUG_ON(i >= f->num_members); rcu_assign_pointer(f->arr[i], rcu_dereference_protected(f->arr[f->num_members - 1], lockdep_is_held(&f->lock))); f->num_members--; if (f->num_members == 0) __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { if (sk->sk_family != PF_PACKET) return false; return ptype->af_packet_priv == pkt_sk(sk)->fanout; } static void fanout_init_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_LB: atomic_set(&f->rr_cur, 0); break; case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: RCU_INIT_POINTER(f->bpf_prog, NULL); break; } } static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) { struct bpf_prog *old; spin_lock(&f->lock); old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock)); rcu_assign_pointer(f->bpf_prog, new); spin_unlock(&f->lock); if (old) { synchronize_net(); bpf_prog_destroy(old); } } static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; struct sock_fprog fprog; int ret; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; ret = copy_bpf_fprog_from_user(&fprog, data, len); if (ret) return ret; ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); if (ret) return ret; __fanout_set_data_bpf(po->fanout, new); return 0; } static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; u32 fd; if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; if (len != sizeof(fd)) return -EINVAL; if (copy_from_sockptr(&fd, data, len)) return -EFAULT; new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(new)) return PTR_ERR(new); __fanout_set_data_bpf(po->fanout, new); return 0; } static int fanout_set_data(struct packet_sock *po, sockptr_t data, unsigned int len) { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: return fanout_set_data_cbpf(po, data, len); case PACKET_FANOUT_EBPF: return fanout_set_data_ebpf(po, data, len); default: return -EINVAL; } } static void fanout_release_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: __fanout_set_data_bpf(f, NULL); } } static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) { struct packet_fanout *f; list_for_each_entry(f, &fanout_list, list) { if (f->id == candidate_id && read_pnet(&f->net) == sock_net(sk)) { return false; } } return true; } static bool fanout_find_new_id(struct sock *sk, u16 *new_id) { u16 id = fanout_next_id; do { if (__fanout_id_is_free(sk, id)) { *new_id = id; fanout_next_id = id + 1; return true; } id++; } while (id != fanout_next_id); return false; } static int fanout_add(struct sock *sk, struct fanout_args *args) { struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); u16 type_flags = args->type_flags; struct packet_fanout *f, *match; u8 type = type_flags & 0xff; u8 flags = type_flags >> 8; u16 id = args->id; int err; switch (type) { case PACKET_FANOUT_ROLLOVER: if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) return -EINVAL; break; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: case PACKET_FANOUT_QM: case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: break; default: return -EINVAL; } mutex_lock(&fanout_mutex); err = -EALREADY; if (po->fanout) goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { err = -ENOMEM; rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); if (!rollover) goto out; atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { if (id != 0) { err = -EINVAL; goto out; } if (!fanout_find_new_id(sk, &id)) { err = -ENOMEM; goto out; } /* ephemeral flag for the first socket in the group: drop it */ flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); } match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && read_pnet(&f->net) == sock_net(sk)) { match = f; break; } } err = -EINVAL; if (match) { if (match->flags != flags) goto out; if (args->max_num_members && args->max_num_members != match->max_num_members) goto out; } else { if (args->max_num_members > PACKET_FANOUT_MAX) goto out; if (!args->max_num_members) /* legacy PACKET_FANOUT_MAX */ args->max_num_members = 256; err = -ENOMEM; match = kvzalloc(struct_size(match, arr, args->max_num_members), GFP_KERNEL); if (!match) goto out; write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; match->flags = flags; INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); refcount_set(&match->sk_ref, 0); fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; list_add(&match->list, &fanout_list); } err = -EINVAL; spin_lock(&po->bind_lock); if (po->num && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ WRITE_ONCE(po->fanout, match); po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __dev_remove_pack(&po->prot_hook); __fanout_link(sk, po); } err = 0; } } spin_unlock(&po->bind_lock); if (err && !refcount_read(&match->sk_ref)) { list_del(&match->list); kvfree(match); } out: kfree(rollover); mutex_unlock(&fanout_mutex); return err; } /* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. * It is the responsibility of the caller to call fanout_release_data() and * free the returned packet_fanout (after synchronize_net()) */ static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; mutex_lock(&fanout_mutex); f = po->fanout; if (f) { po->fanout = NULL; if (refcount_dec_and_test(&f->sk_ref)) list_del(&f->list); else f = NULL; } mutex_unlock(&fanout_mutex); return f; } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, struct sk_buff *skb) { /* Earlier code assumed this would be a VLAN pkt, double-check * this now that we have the actual packet in hand. We can only * do this check on Ethernet devices. */ if (unlikely(dev->type != ARPHRD_ETHER)) return false; skb_reset_mac_header(skb); return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); } static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; /* * When we registered the protocol we saved the socket in the data * field for just this event. */ sk = pt->af_packet_priv; /* * Yank back the headers [hope the device set this * right or kerboom...] * * Incoming packets have ll header pulled, * push it back. * * For outgoing ones skb->data == skb_mac_header(skb) * so that this procedure is noop. */ if (skb->pkt_type == PACKET_LOOPBACK) goto out; if (!net_eq(dev_net(dev), sock_net(sk))) goto out; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) goto oom; /* drop any routing info */ skb_dst_drop(skb); /* drop conntrack reference */ nf_reset_ct(skb); spkt = &PACKET_SKB_CB(skb)->sa.pkt; skb_push(skb, skb->data - skb_mac_header(skb)); /* * The SOCK_PACKET socket receives _all_ frames. */ spkt->spkt_family = dev->type; strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); spkt->spkt_protocol = skb->protocol; /* * Charge the memory to the socket. This is done specifically * to prevent sockets using all the memory up. */ if (sock_queue_rcv_skb(sk, skb) == 0) return 0; out: kfree_skb(skb); oom: return 0; } static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { int depth; if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } /* Move network header to the right position for VLAN tagged packets */ if (likely(skb->dev->type == ARPHRD_ETHER) && eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); skb_probe_transport_header(skb); } /* * Output a raw packet to a device layer. This bypasses all the other * protocol layers and you must therefore supply it with a complete frame */ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; /* * Get and verify the address. */ if (saddr) { if (msg->msg_namelen < sizeof(struct sockaddr)) return -EINVAL; if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) proto = saddr->spkt_protocol; } else return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ /* * Find the device first to size check it */ saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; retry: rcu_read_lock(); dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; err = -ENETDOWN; if (!(dev->flags & IFF_UP)) goto out_unlock; /* * You may not queue a frame bigger than the mtu. This is the lowest level * raw protocol and you must do your own fragmentation at this level. */ if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { size_t reserved = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; rcu_read_unlock(); skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* FIXME: Save some space for broken drivers that write a hard * header at transmission time by themselves. PPP is the notable * one here. This should really be fixed at the driver level. */ skb_reserve(skb, reserved); skb_reset_network_header(skb); /* Try to align data part correctly */ if (hhlen) { skb->data -= hhlen; skb->tail -= hhlen; if (len < hhlen) skb_reset_network_header(skb); } err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) goto out_free; goto retry; } if (!dev_validate_header(dev, skb->data, len) || !skb->len) { err = -EINVAL; goto out_unlock; } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_unlock; } sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } skb->protocol = proto; skb->dev = dev; skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, &sockc); if (unlikely(extra_len == 4)) skb->no_fcs = 1; packet_parse_headers(skb, sock); dev_queue_xmit(skb); rcu_read_unlock(); return len; out_unlock: rcu_read_unlock(); out_free: kfree_skb(skb); return err; } static unsigned int run_filter(struct sk_buff *skb, const struct sock *sk, unsigned int res) { struct sk_filter *filter; rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter != NULL) res = bpf_prog_run_clear_cb(filter->prog, skb); rcu_read_unlock(); return res; } static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, size_t *len, int vnet_hdr_sz) { struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 }; if (*len < vnet_hdr_sz) return -EINVAL; *len -= vnet_hdr_sz; if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0)) return -EINVAL; return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz); } /* * This function makes lazy skb cloning in hope that most of packets * are discarded by BPF. * * Note tricky part: we DO mangle shared skb! skb->data, skb->len * and skb->cb are mangled. It works because (and until) packets * falling here are owned by current CPU. Output packets are cloned * by dev_queue_xmit_nit(), input packets are processed by net_bh * sequentially, so that if we return skb to original state on exit, * we will not harm anyone. */ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk = NULL; struct sockaddr_ll *sll; struct packet_sock *po; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; skb->dev = dev; if (dev_has_header(dev)) { /* The device has an explicit notion of ll header, * exported to higher levels. * * Otherwise, the device hides details of its frame * structure, so that corresponding packet head is * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb); res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; if (snaplen > res) snaplen = res; if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto drop_n_acct; if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (nskb == NULL) goto drop_n_acct; if (skb_head != skb->data) { skb->data = skb_head; skb->len = skb_len; } consume_skb(skb); skb = nskb; } sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; sll->sll_halen = dev_parse_header(skb, sll->sll_addr); /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). * Use their space for storing the original skb length. */ PACKET_SKB_CB(skb)->sa.origlen = skb->len; if (pskb_trim(skb, snaplen)) goto drop_n_acct; skb_set_owner_r(skb, sk); skb->dev = NULL; skb_dst_drop(skb); /* drop conntrack reference */ nf_reset_ct(skb); spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; sock_skb_set_dropcount(sk, skb); skb_clear_delivery_time(skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); return 0; drop_n_acct: atomic_inc(&po->tp_drops); atomic_inc(&sk->sk_drops); drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: sk_skb_reason_drop(sk, skb, drop_reason); return 0; } static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk = NULL; struct packet_sock *po; struct sockaddr_ll *sll; union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; unsigned short macoff, hdrlen; unsigned int netoff; struct sk_buff *copy_skb = NULL; struct timespec64 ts; __u32 ts_status; unsigned int slot_id = 0; int vnet_hdr_sz = 0; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing * userspace to call getsockopt(..., PACKET_HDRLEN, ...). */ BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; sk = pt->af_packet_priv; po = pkt_sk(sk); if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; if (dev_has_header(dev)) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb_network_offset(skb)); } } snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb); res = run_filter(skb, sk, snaplen); if (!res) goto drop_n_restore; /* If we are flooded, just give up */ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { atomic_inc(&po->tp_drops); goto drop_n_restore; } if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) status |= TP_STATUS_GSO_TCP; if (snaplen > res) snaplen = res; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + po->tp_reserve; } else { unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); if (vnet_hdr_sz) netoff += vnet_hdr_sz; macoff = netoff - maclen; } if (netoff > USHRT_MAX) { atomic_inc(&po->tp_drops); goto drop_n_restore; } if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { if (READ_ONCE(po->copy_thresh) && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { if (skb_shared(skb)) { copy_skb = skb_clone(skb, GFP_ATOMIC); } else { copy_skb = skb_get(skb); skb_head = skb->data; } if (copy_skb) { memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { snaplen = 0; vnet_hdr_sz = 0; } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { u32 nval; nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", snaplen, nval, macoff); snaplen = nval; if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; vnet_hdr_sz = 0; } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; if (po->tp_version <= TPACKET_V2) { slot_id = po->rx_ring.head; if (test_bit(slot_id, po->rx_ring.rx_owner_map)) goto drop_n_account; __set_bit(slot_id, po->rx_ring.rx_owner_map); } if (vnet_hdr_sz && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), vio_le(), true, 0)) { if (po->tp_version == TPACKET_V3) prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* * LOSING will be reported till you read the stats, * because it's COR - Clear On Read. * Anyways, moving it for V1/V2 only as V3 doesn't need this * at packet level. */ if (atomic_read(&po->tp_drops)) status |= TP_STATUS_LOSING; } po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; skb_clear_delivery_time(copy_skb); __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); /* Always timestamp; prefer an existing software timestamp taken * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp) | SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); status |= ts_status; switch (po->tp_version) { case TPACKET_V1: h.h1->tp_len = skb->len; h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; h.h1->tp_sec = ts.tv_sec; h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; hdrlen = sizeof(*h.h1); break; case TPACKET_V2: h.h2->tp_len = skb->len; h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; if (skb_vlan_tag_present(skb)) { h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev); h.h2->tp_vlan_tpid = ntohs(skb->protocol); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; h.h2->tp_vlan_tpid = 0; } memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: /* tp_nxt_offset,vlan are already populated above. * So DONT clear those fields here */ h.h3->tp_status |= status; h.h3->tp_len = skb->len; h.h3->tp_snaplen = snaplen; h.h3->tp_mac = macoff; h.h3->tp_net = netoff; h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: BUG(); } sll = h.raw + TPACKET_ALIGN(hdrlen); sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ? vlan_get_protocol_dgram(skb) : skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; smp_mb(); #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 if (po->tp_version <= TPACKET_V2) { u8 *start, *end; end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + macoff + snaplen); for (start = h.raw; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); } smp_wmb(); #endif if (po->tp_version <= TPACKET_V2) { spin_lock(&sk->sk_receive_queue.lock); __packet_set_status(po, h.raw, status); __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { skb->data = skb_head; skb->len = skb_len; } drop: sk_skb_reason_drop(sk, skb, drop_reason); return 0; drop_n_account: spin_unlock(&sk->sk_receive_queue.lock); atomic_inc(&po->tp_drops); drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; sk->sk_data_ready(sk); sk_skb_reason_drop(sk, copy_skb, drop_reason); goto drop_n_restore; } static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); if (likely(po->tx_ring.pg_vec)) { void *ph; __u32 ts; ph = skb_zcopy_get_nouarg(skb); packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); complete(&po->skb_completion); } sock_wfree(skb); } static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) { if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) return -EINVAL; return 0; } static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz) { int ret; if (*len < vnet_hdr_sz) return -EINVAL; *len -= vnet_hdr_sz; if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter)) return -EFAULT; ret = __packet_snd_vnet_parse(vnet_hdr, *len); if (ret) return ret; /* move iter to point to the start of mac header */ if (vnet_hdr_sz != sizeof(struct virtio_net_hdr)) iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr)); return 0; } static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, __be16 proto, unsigned char *addr, int hlen, int copylen, const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; int err; ph.raw = frame; skb->protocol = proto; skb->dev = dev; skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); skb_reset_network_header(skb); to_write = tp_len; if (sock->type == SOCK_DGRAM) { err = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, tp_len); if (unlikely(err < 0)) return -EINVAL; } else if (copylen) { int hdrlen = min_t(int, copylen, tp_len); skb_push(skb, dev->hard_header_len); skb_put(skb, copylen - dev->hard_header_len); err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; if (!dev_validate_header(dev, skb->data, hdrlen)) return -EINVAL; data += hdrlen; to_write -= hdrlen; } offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { pr_err("Packet exceed the number of skb frags(%u)\n", (unsigned int)MAX_SKB_FRAGS); return -EFAULT; } page = pgv_to_page(data); data += len; flush_dcache_page(page); get_page(page); skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; len = ((to_write > len_max) ? len_max : to_write); } packet_parse_headers(skb, sock); return tp_len; } static int tpacket_parse_header(struct packet_sock *po, void *frame, int size_max, void **data) { union tpacket_uhdr ph; int tp_len, off; ph.raw = frame; switch (po->tp_version) { case TPACKET_V3: if (ph.h3->tp_next_offset != 0) { pr_warn_once("variable sized slot not supported"); return -EINVAL; } tp_len = ph.h3->tp_len; break; case TPACKET_V2: tp_len = ph.h2->tp_len; break; default: tp_len = ph.h1->tp_len; break; } if (unlikely(tp_len > size_max)) { pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); return -EMSGSIZE; } if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); off_max = po->tx_ring.frame_size - tp_len; if (po->sk.sk_type == SOCK_DGRAM) { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_net; break; case TPACKET_V2: off = ph.h2->tp_net; break; default: off = ph.h1->tp_net; break; } } else { switch (po->tp_version) { case TPACKET_V3: off = ph.h3->tp_mac; break; case TPACKET_V2: off = ph.h2->tp_mac; break; default: off = ph.h1->tp_mac; break; } } if (unlikely((off < off_min) || (off_max < off))) return -EINVAL; } else { off = po->tp_hdrlen - sizeof(struct sockaddr_ll); } *data = frame + off; return tp_len; } static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); unsigned char *addr = NULL; int tp_len, size_max; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; long timeo = 0; mutex_lock(&po->pg_vec_lock); /* packet_sendmsg() check on tx_ring.pg_vec was lockless, * we need to confirm it under protection of pg_vec_lock. */ if (unlikely(!po->tx_ring.pg_vec)) { err = -EBUSY; goto out; } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (po->sk.sk_socket->type == SOCK_DGRAM) { if (dev && msg->msg_namelen < dev->addr_len + offsetof(struct sockaddr_ll, sll_addr)) goto out_put; addr = saddr->sll_addr; } } err = -ENXIO; if (unlikely(dev == NULL)) goto out; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; sockcm_init(&sockc, &po->sk); if (msg->msg_controllen) { err = sock_cmsg_send(&po->sk, msg, &sockc); if (unlikely(err)) goto out_put; } if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { if (need_wait && skb) { timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); if (timeo <= 0) { err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; goto out_put; } } /* check for additional frames */ continue; } skb = NULL; tp_len = tpacket_parse_header(po, ph, size_max, &data); if (tp_len < 0) goto tpacket_error; status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; if (vnet_hdr_sz) { vnet_hdr = data; data += vnet_hdr_sz; tp_len -= vnet_hdr_sz; if (tp_len < 0 || __packet_snd_vnet_parse(vnet_hdr, tp_len)) { tp_len = -EINVAL; goto tpacket_error; } copylen = __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len); } copylen = max_t(int, copylen, dev->hard_header_len); skb = sock_alloc_send_skb(&po->sk, hlen + tlen + sizeof(struct sockaddr_ll) + (copylen - dev->hard_header_len), !need_wait, &err); if (unlikely(skb == NULL)) { /* we assume the socket was initially writeable ... */ if (likely(len_sum > 0)) err = len_sum; goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !vnet_hdr_sz && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { tpacket_error: if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); kfree_skb(skb); continue; } else { status = TP_STATUS_WRONG_FORMAT; err = tp_len; goto out_status; } } if (vnet_hdr_sz) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; } virtio_net_hdr_set_proto(skb, vnet_hdr); } skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ skb = NULL; goto out_status; } /* * skb was dropped but not destructed yet; * let's treat it like congestion or err < 0 */ err = 0; } packet_increment_head(&po->tx_ring); len_sum += tp_len; } while (likely((ph != NULL) || /* Note: packet_read_pending() might be slow if we have * to call it as it's per_cpu variable, but in fast-path * we already short-circuit the loop with the first * condition, and luckily don't have to go that path * anyway. */ (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; } static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, size_t reserve, size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return NULL; skb_reserve(skb, reserve); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; } static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); struct sk_buff *skb; struct net_device *dev; __be16 proto; unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); int hlen, tlen, linear; int extra_len = 0; /* * Get and verify the address. */ if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (sock->type == SOCK_DGRAM) { if (dev && msg->msg_namelen < dev->addr_len + offsetof(struct sockaddr_ll, sll_addr)) goto out_unlock; addr = saddr->sll_addr; } } err = -ENXIO; if (unlikely(dev == NULL)) goto out_unlock; err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto out_unlock; } if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; if (vnet_hdr_sz) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz); if (err) goto out_unlock; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { if (!netif_supports_nofcs(dev)) { err = -EPROTONOSUPPORT; goto out_unlock; } extra_len = 4; /* We're doing our own CRC */ } err = -EMSGSIZE; if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); linear = max(linear, min_t(int, len, dev->hard_header_len)); skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; skb_reset_network_header(skb); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); if (len < reserve + sizeof(struct ipv6hdr) && dev->min_header_len != dev->hard_header_len) skb_reset_network_header(skb); } /* Returns -EFAULT on error */ err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len); if (err) goto out_free; if ((sock->type == SOCK_RAW && !dev_validate_header(dev, skb->data, len)) || !skb->len) { err = -EINVAL; goto out_free; } skb_setup_tx_timestamp(skb, &sockc); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_free; } skb->protocol = proto; skb->dev = dev; skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; packet_parse_headers(skb, sock); if (vnet_hdr_sz) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) goto out_free; len += vnet_hdr_sz; virtio_net_hdr_set_proto(skb, &vnet_hdr); } err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); if (err) goto out_unlock; } dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: dev_put(dev); out: return err; } static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. * tpacket_snd() will redo the check safely. */ if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); return packet_snd(sock, msg, len); } /* * Close a PACKET socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. */ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; if (!sk) return 0; net = sock_net(sk); po = pkt_sk(sk); mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); mutex_unlock(&net->packet.sklist_lock); sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); packet_cached_dev_reset(po); if (po->prot_hook.dev) { netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); packet_flush_mclist(sk); lock_sock(sk); if (po->rx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); } if (po->tx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); } release_sock(sk); f = fanout_release(sk); synchronize_net(); kfree(po->rollover); if (f) { fanout_release_data(f); kvfree(f); } /* * Now the socket is dead. No more input will appear. */ sock_orphan(sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); packet_free_pending(po); sock_put(sk); return 0; } /* * Attach a packet hook. */ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev = NULL; bool unlisted = false; bool need_rehook; int ret = 0; lock_sock(sk); spin_lock(&po->bind_lock); if (!proto) proto = po->num; rcu_read_lock(); if (po->fanout) { ret = -EINVAL; goto out_unlock; } if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { ret = -ENODEV; goto out_unlock; } } else if (ifindex) { dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (!dev) { ret = -ENODEV; goto out_unlock; } } need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev; if (need_rehook) { dev_hold(dev); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() */ WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); if (dev) unlisted = !dev_get_by_index_rcu(sock_net(sk), dev->ifindex); } BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); if (unlikely(unlisted)) { po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { netdev_hold(dev, &po->prot_hook.dev_tracker, GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } dev_put(dev); } if (proto == 0 || !need_rehook) goto out_unlock; if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } out_unlock: rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); return ret; } /* * Bind a packet socket to a device */ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality */ if (addr_len != sizeof(struct sockaddr)) return -EINVAL; /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; /* * Check legality */ if (addr_len < sizeof(struct sockaddr_ll)) return -EINVAL; if (sll->sll_family != AF_PACKET) return -EINVAL; return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); } static struct proto packet_proto = { .name = "PACKET", .owner = THIS_MODULE, .obj_size = sizeof(struct packet_sock), }; /* * Create a packet of type SOCK_PACKET. */ static int packet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; sock->ops = &packet_ops; if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; po = pkt_sk(sk); err = packet_alloc_pending(po); if (err) goto out_sk_free; sock_init_data(sock, sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; /* * Attach a protocol block */ spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); po->rollover = NULL; po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); sock_prot_inuse_add(net, &packet_proto, 1); return 0; out_sk_free: sk_free(sk); out: return err; } /* * Pull a packet from our receive queue and hand it to the user. * If necessary we block. */ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz); unsigned int origlen = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) goto out; #if 0 /* What error should we return now? EUNATTACH? */ if (pkt_sk(sk)->ifindex < 0) return -ENODEV; #endif if (flags & MSG_ERRQUEUE) { err = sock_recv_errqueue(sk, msg, len, SOL_PACKET, PACKET_TX_TIMESTAMP); goto out; } /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. * * Now it will return ENETDOWN, if device have just gone down, * but then it will block. */ skb = skb_recv_datagram(sk, flags, &err); /* * An error occurred so return it. Because skb_recv_datagram() * handles the blocking we don't see and worry about blocking * retries. */ if (skb == NULL) goto out; packet_rcv_try_clear_pressure(pkt_sk(sk)); if (vnet_hdr_len) { err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len); if (err) goto out_free; } /* You lose any data beyond the buffer you gave. If it worries * a user program they can ask the device for its MTU * anyway. */ copied = skb->len; if (copied > len) { copied = len; msg->msg_flags |= MSG_TRUNC; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; if (sock->type != SOCK_PACKET) { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; /* Original length was stored in sockaddr_ll fields */ origlen = PACKET_SKB_CB(skb)->sa.origlen; sll->sll_family = AF_PACKET; sll->sll_protocol = (sock->type == SOCK_DGRAM) ? vlan_get_protocol_dgram(skb) : skb->protocol; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { const size_t max_len = min(sizeof(skb->cb), sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); copy_len = msg->msg_namelen; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { memset(msg->msg_name + offsetof(struct sockaddr_ll, sll_addr), 0, sizeof(sll->sll_addr)); msg->msg_namelen = sizeof(struct sockaddr_ll); } } if (WARN_ON_ONCE(copy_len > max_len)) { copy_len = max_len; msg->msg_namelen = copy_len; } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) aux.tp_status |= TP_STATUS_GSO_TCP; aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); if (skb_vlan_tag_present(skb)) { aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex); if (dev) { aux.tp_vlan_tci = vlan_get_tci(skb, dev); aux.tp_vlan_tpid = ntohs(skb->protocol); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; } rcu_read_unlock(); } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; } put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } /* * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. */ err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); out_free: skb_free_datagram(sk, skb); out: return err; } static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, int peer) { struct net_device *dev; struct sock *sk = sock->sk; if (peer) return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); } static int packet_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct net_device *dev; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); int ifindex; if (peer) return -EOPNOTSUPP; ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; sll->sll_ifindex = ifindex; sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; /* Let __fortify_memcpy_chk() know the actual buffer size. */ memcpy(((struct sockaddr_storage *)sll)->__data + offsetof(struct sockaddr_ll, sll_addr) - offsetofend(struct sockaddr_ll, sll_family), dev->dev_addr, dev->addr_len); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } rcu_read_unlock(); return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; } static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) { switch (i->type) { case PACKET_MR_MULTICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr); else return dev_mc_del(dev, i->addr); break; case PACKET_MR_PROMISC: return dev_set_promiscuity(dev, what); case PACKET_MR_ALLMULTI: return dev_set_allmulti(dev, what); case PACKET_MR_UNICAST: if (i->alen != dev->addr_len) return -EINVAL; if (what > 0) return dev_uc_add(dev, i->addr); else return dev_uc_del(dev, i->addr); break; default: break; } return 0; } static void packet_dev_mclist_delete(struct net_device *dev, struct packet_mclist **mlp) { struct packet_mclist *ml; while ((ml = *mlp) != NULL) { if (ml->ifindex == dev->ifindex) { packet_dev_mc(dev, ml, -1); *mlp = ml->next; kfree(ml); } else mlp = &ml->next; } } static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; struct net_device *dev; int err; rtnl_lock(); err = -ENODEV; dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); if (!dev) goto done; err = -EINVAL; if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; i = kmalloc(sizeof(*i), GFP_KERNEL); if (i == NULL) goto done; err = 0; for (ml = po->mclist; ml; ml = ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { ml->count++; /* Free the new element ... */ kfree(i); goto done; } } i->type = mreq->mr_type; i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; err = packet_dev_mc(dev, i, 1); if (err) { po->mclist = i->next; kfree(i); } done: rtnl_unlock(); return err; } static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; rtnl_lock(); for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { if (ml->ifindex == mreq->mr_ifindex && ml->type == mreq->mr_type && ml->alen == mreq->mr_alen && memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev) packet_dev_mc(dev, ml, -1); kfree(ml); } break; } } rtnl_unlock(); return 0; } static void packet_flush_mclist(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml; if (!po->mclist) return; rtnl_lock(); while ((ml = po->mclist) != NULL) { struct net_device *dev; po->mclist = ml->next; dev = __dev_get_by_index(sock_net(sk), ml->ifindex); if (dev != NULL) packet_dev_mc(dev, ml, -1); kfree(ml); } rtnl_unlock(); } static int packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) return -ENOPROTOOPT; switch (optname) { case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { struct packet_mreq_max mreq; int len = optlen; memset(&mreq, 0, sizeof(mreq)); if (len < sizeof(struct packet_mreq)) return -EINVAL; if (len > sizeof(mreq)) len = sizeof(mreq); if (copy_from_sockptr(&mreq, optval, len)) return -EFAULT; if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else ret = packet_mc_drop(sk, &mreq); return ret; } case PACKET_RX_RING: case PACKET_TX_RING: { union tpacket_req_u req_u; ret = -EINVAL; lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: if (optlen < sizeof(req_u.req)) break; ret = copy_from_sockptr(&req_u.req, optval, sizeof(req_u.req)) ? -EINVAL : 0; break; case TPACKET_V3: default: if (optlen < sizeof(req_u.req3)) break; ret = copy_from_sockptr(&req_u.req3, optval, sizeof(req_u.req3)) ? -EINVAL : 0; break; } if (!ret) ret = packet_set_ring(sk, &req_u, 0, optname == PACKET_TX_RING); release_sock(sk); return ret; } case PACKET_COPY_THRESH: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; WRITE_ONCE(pkt_sk(sk)->copy_thresh, val); return 0; } case PACKET_VERSION: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: break; default: return -EINVAL; } lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_version = val; ret = 0; } release_sock(sk); return ret; } case PACKET_RESERVE: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { po->tp_reserve = val; ret = 0; } release_sock(sk); return ret; } case PACKET_LOSS: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); return ret; } case PACKET_AUXDATA: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: { int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: case PACKET_VNET_HDR_SZ: { int val, hdr_len; if (sock->type != SOCK_RAW) return -EINVAL; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (optname == PACKET_VNET_HDR_SZ) { if (val && val != sizeof(struct virtio_net_hdr) && val != sizeof(struct virtio_net_hdr_mrg_rxbuf)) return -EINVAL; hdr_len = val; } else { hdr_len = val ? sizeof(struct virtio_net_hdr) : 0; } lock_sock(sk); if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { WRITE_ONCE(po->vnet_hdr_sz, hdr_len); ret = 0; } release_sock(sk); return ret; } case PACKET_TIMESTAMP: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: { struct fanout_args args = { 0 }; if (optlen != sizeof(int) && optlen != sizeof(args)) return -EINVAL; if (copy_from_sockptr(&args, optval, optlen)) return -EFAULT; return fanout_add(sk, &args); } case PACKET_FANOUT_DATA: { /* Paired with the WRITE_ONCE() in fanout_add() */ if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); } case PACKET_IGNORE_OUTGOING: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val < 0 || val > 1) return -EINVAL; WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: { unsigned int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: { int val; if (optlen != sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val); return 0; } default: return -ENOPROTOOPT; } } static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; int val, lv = sizeof(val); struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; int drops; if (level != SOL_PACKET) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case PACKET_STATISTICS: spin_lock_bh(&sk->sk_receive_queue.lock); memcpy(&st, &po->stats, sizeof(st)); memset(&po->stats, 0, sizeof(po->stats)); spin_unlock_bh(&sk->sk_receive_queue.lock); drops = atomic_xchg(&po->tp_drops, 0); if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); st.stats3.tp_drops = drops; st.stats3.tp_packets += drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); st.stats1.tp_drops = drops; st.stats1.tp_packets += drops; data = &st.stats1; } break; case PACKET_AUXDATA: val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: val = !!READ_ONCE(po->vnet_hdr_sz); break; case PACKET_VNET_HDR_SZ: val = READ_ONCE(po->vnet_hdr_sz); break; case PACKET_COPY_THRESH: val = READ_ONCE(pkt_sk(sk)->copy_thresh); break; case PACKET_VERSION: val = po->tp_version; break; case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); if (len < sizeof(int)) return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { case TPACKET_V1: val = sizeof(struct tpacket_hdr); break; case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; case TPACKET_V3: val = sizeof(struct tpacket3_hdr); break; default: return -EINVAL; } break; case PACKET_RESERVE: val = po->tp_reserve; break; case PACKET_LOSS: val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | ((u32)po->fanout->type << 16) | ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_IGNORE_OUTGOING: val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) return -EINVAL; rstats.tp_all = atomic_long_read(&po->rollover->num); rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); data = &rstats; lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS); break; default: return -ENOPROTOOPT; } if (len > lv) len = lv; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, data, len)) return -EFAULT; return 0; } static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { struct sock *sk; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) packet_dev_mclist_delete(dev, &po->mclist); fallthrough; case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); } break; case NETDEV_UP: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (po->num) register_prot_hook(sk); spin_unlock(&po->bind_lock); } break; } } rcu_read_unlock(); return NOTIFY_DONE; } static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; switch (cmd) { case SIOCOUTQ: { int amount = sk_wmem_alloc_get(sk); return put_user(amount, (int __user *)arg); } case SIOCINQ: { struct sk_buff *skb; int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: case SIOCDARP: case SIOCGARP: case SIOCSARP: case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: return inet_dgram_ops.ioctl(sock, cmd, arg); #endif default: return -ENOIOCTLCMD; } return 0; } static __poll_t packet_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); __poll_t mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { if (!packet_previous_rx_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) mask |= EPOLLIN | EPOLLRDNORM; } packet_rcv_try_clear_pressure(po); spin_unlock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_write_queue.lock); if (po->tx_ring.pg_vec) { if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) mask |= EPOLLOUT | EPOLLWRNORM; } spin_unlock_bh(&sk->sk_write_queue.lock); return mask; } /* Dirty? Well, I still did not learn better way to account * for user mmaps. */ static void packet_mm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct socket *sock = file->private_data; struct sock *sk = sock->sk; if (sk) atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { .open = packet_mm_open, .close = packet_mm_close, }; static void free_pg_vec(struct pgv *pg_vec, unsigned int order, unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { if (is_vmalloc_addr(pg_vec[i].buffer)) vfree(pg_vec[i].buffer); else free_pages((unsigned long)pg_vec[i].buffer, order); pg_vec[i].buffer = NULL; } } kfree(pg_vec); } static char *alloc_one_pg_vec_page(unsigned long order) { char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); if (buffer) return buffer; /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; /* complete and utter failure */ return NULL; } static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec; int i; pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; for (i = 0; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL; goto out; } static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); unsigned long *rx_owner_map = NULL; int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; int err; /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; err = -EBUSY; if (!closing) { if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; } if (req->tp_block_nr) { unsigned int min_frame_size; /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V1: po->tp_hdrlen = TPACKET_HDRLEN; break; case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; case TPACKET_V3: po->tp_hdrlen = TPACKET3_HDRLEN; break; } err = -EINVAL; if (unlikely((int)req->tp_block_size <= 0)) goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && req->tp_block_size < BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; err = -ENOMEM; order = get_order(req->tp_block_size); pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V3: /* Block transmit is not supported yet */ if (!tx_ring) { init_prb_bdqc(po, rb, pg_vec, req_u); } else { struct tpacket_req3 *req3 = &req_u->req3; if (req3->tp_retire_blk_tov || req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; goto out_free_pg_vec; } } break; default: if (!tx_ring) { rx_owner_map = bitmap_alloc(req->tp_frame_nr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!rx_owner_map) goto out_free_pg_vec; } break; } } /* Done */ else { err = -EINVAL; if (unlikely(req->tp_frame_nr)) goto out; } /* Detach socket from network */ spin_lock(&po->bind_lock); was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); synchronize_net(); err = -EBUSY; mutex_lock(&po->pg_vec_lock); if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); if (po->tp_version <= TPACKET_V2) swap(rb->rx_owner_map, rx_owner_map); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); if (atomic_long_read(&po->mapped)) pr_err("packet_mmap: vma is busy: %ld\n", atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running) { WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } out_free_pg_vec: if (pg_vec) { bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); } out: return err; } static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned long size, expected_size; struct packet_ring_buffer *rb; unsigned long start; int err = -EINVAL; int i; if (vma->vm_pgoff) return -EINVAL; mutex_lock(&po->pg_vec_lock); expected_size = 0; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec) { expected_size += rb->pg_vec_len * rb->pg_vec_pages * PAGE_SIZE; } } if (expected_size == 0) goto out; size = vma->vm_end - vma->vm_start; if (size != expected_size) goto out; start = vma->vm_start; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec == NULL) continue; for (i = 0; i < rb->pg_vec_len; i++) { struct page *page; void *kaddr = rb->pg_vec[i].buffer; int pg_num; for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; kaddr += PAGE_SIZE; } } } atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; out: mutex_unlock(&po->pg_vec_lock); return err; } static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind_spkt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, .poll = datagram_poll, .ioctl = packet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, }; static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, }; static struct notifier_block packet_netdev_notifier = { .notifier_call = packet_notifier, }; #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct net *net = seq_file_net(seq); rcu_read_lock(); return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%*sRefCnt Type Proto Iface R Rmem User Inode\n", IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk"); else { struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } return 0; } static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, .show = packet_seq_show, }; #endif static int __net_init packet_net_init(struct net *net) { mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); #ifdef CONFIG_PROC_FS if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; #endif /* CONFIG_PROC_FS */ return 0; } static void __net_exit packet_net_exit(struct net *net) { remove_proc_entry("packet", net->proc_net); WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); } static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, }; static void __exit packet_exit(void) { sock_unregister(PF_PACKET); proto_unregister(&packet_proto); unregister_netdevice_notifier(&packet_netdev_notifier); unregister_pernet_subsys(&packet_net_ops); } static int __init packet_init(void) { int rc; rc = register_pernet_subsys(&packet_net_ops); if (rc) goto out; rc = register_netdevice_notifier(&packet_netdev_notifier); if (rc) goto out_pernet; rc = proto_register(&packet_proto, 0); if (rc) goto out_notifier; rc = sock_register(&packet_family_ops); if (rc) goto out_proto; return 0; out_proto: proto_unregister(&packet_proto); out_notifier: unregister_netdevice_notifier(&packet_netdev_notifier); out_pernet: unregister_pernet_subsys(&packet_net_ops); out: return rc; } module_init(packet_init); module_exit(packet_exit); MODULE_DESCRIPTION("Packet socket support (AF_PACKET)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_PACKET);
116 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_STRUCT_H #define _LINUX_FS_STRUCT_H #include <linux/path.h> #include <linux/spinlock.h> #include <linux/seqlock.h> struct fs_struct { int users; spinlock_t lock; seqcount_spinlock_t seq; int umask; int in_exec; struct path root, pwd; } __randomize_layout; extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, const struct path *); extern void set_fs_pwd(struct fs_struct *, const struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void free_fs_struct(struct fs_struct *); extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { spin_lock(&fs->lock); *root = fs->root; path_get(root); spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); spin_unlock(&fs->lock); } extern bool current_chrooted(void); #endif /* _LINUX_FS_STRUCT_H */
5 5 1 11 11 26 26 157 156 156 156 156 156 156 9 2 7 7 7 12 1 11 63 45 45 45 45 42 3 2 2 42 12 38 10 28 27 17 1 1 1 16 2 5 21 20 45 13 89 89 51 32 90 90 26 26 26 26 14 14 24 18 7 30 30 2 21 21 21 2 19 21 20 4 16 16 19 19 19 1 3 1 1 1 1 9 11 22 23 5 15 3 21 19 2 12 12 12 12 2173 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "mesh-interface.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/percpu.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" #include "send.h" #include "translation-table.h" /** * batadv_skb_head_push() - Increase header size and move (push) head pointer * @skb: packet buffer which should be modified * @len: number of bytes to add * * Return: 0 on success or negative error number in case of failure */ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; /* TODO: We must check if we can release all references to non-payload * data using __skb_header_release in our skbs to allow skb_cow_header * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. */ result = skb_cow_head(skb, len); if (result < 0) return result; skb_push(skb, len); return 0; } static int batadv_interface_open(struct net_device *dev) { netif_start_queue(dev); return 0; } static int batadv_interface_release(struct net_device *dev) { netif_stop_queue(dev); return 0; } /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' * @bat_priv: the bat priv with all the mesh interface information * @idx: index of counter to sum up * * Return: sum of all cpu-local counters */ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) { u64 *counters, sum = 0; int cpu; for_each_possible_cpu(cpu) { counters = per_cpu_ptr(bat_priv->bat_counters, cpu); sum += counters[idx]; } return sum; } static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED); stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX); stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES); return stats; } static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return 0; rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { batadv_tt_local_remove(bat_priv, old_addr, vlan->vid, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, vlan->vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); } rcu_read_unlock(); return 0; } static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { struct batadv_priv *bat_priv = netdev_priv(dev); /* check ranges */ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); bat_priv->mtu_set_by_user = new_mtu; return 0; } /** * batadv_interface_set_rx_mode() - set the rx mode of a device * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman * mesh interface. However a dummy handler enables a user to set static * multicast listeners for instance. */ static void batadv_interface_set_rx_mode(struct net_device *dev) { } static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, struct net_device *mesh_iface) { struct ethhdr *ethhdr; struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; u8 *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; unsigned long brd_delay = 0; bool do_bcast = false, client_added; unsigned short vid; u32 seqno; int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_BCAST; int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); netif_trans_update(mesh_iface); vid = batadv_get_vid(skb, 0); skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; switch (ntohs(proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, sizeof(*vhdr))) goto dropped; vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; /* drop batman-in-batman packets to prevent loops */ if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } fallthrough; case ETH_P_BATMAN: goto dropped; } skb_set_network_header(skb, network_offset); if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; /* skb->data might have been reallocated by batadv_bla_tx() */ ethhdr = eth_hdr(skb); /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source) && !batadv_bla_is_loopdetect_mac(ethhdr->h_source)) { client_added = batadv_tt_local_add(mesh_iface, ethhdr->h_source, vid, skb->skb_iif, skb->mark); if (!client_added) goto dropped; } /* Snoop address candidates from DHCPACKs for early DAT filling */ batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * * The same goes for ECTP sent at least by some Cisco Switches, * it might confuse the mesh when used with bridge loop avoidance. */ if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) goto dropped; if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; gw_mode = atomic_read(&bat_priv->gw.mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* if gw mode is off, broadcast every packet */ if (gw_mode == BATADV_GW_MODE_OFF) { do_bcast = true; goto send; } dhcp_rcp = batadv_gw_dhcp_recipient_get(skb, &header_len, chaddr); /* skb->data may have been modified by * batadv_gw_dhcp_recipient_get() */ ethhdr = eth_hdr(skb); /* if gw_mode is on, broadcast any non-DHCP message. * All the DHCP packets are going to be sent as unicast */ if (dhcp_rcp == BATADV_DHCP_NO) { do_bcast = true; goto send; } if (dhcp_rcp == BATADV_DHCP_TO_CLIENT) dst_hint = chaddr; else if ((gw_mode == BATADV_GW_MODE_SERVER) && (dhcp_rcp == BATADV_DHCP_TO_SERVER)) /* gateways should not forward any DHCP message if * directed to a DHCP server */ goto dropped; send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid, &mcast_is_routable); switch (forw_mode) { case BATADV_FORW_BCAST: break; case BATADV_FORW_UCASTS: case BATADV_FORW_MCAST: do_bcast = false; break; case BATADV_FORW_NONE: fallthrough; default: goto dropped; } } } batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ if (do_bcast) { primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto dropped; /* in case of ARP request, we do not immediately broadcasti the * packet, instead we first wait for DAT to try to retrieve the * correct ARP entry */ if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) brd_delay = msecs_to_jiffies(ARP_REQ_DELAY); if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; bcast_packet->version = BATADV_COMPAT_VERSION; bcast_packet->ttl = BATADV_TTL - 1; /* batman packet type: broadcast */ bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only * this mac is known throughout the mesh */ ether_addr_copy(bcast_packet->orig, primary_if->net_dev->dev_addr); /* set broadcast sequence number */ seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); batadv_send_bcast_packet(bat_priv, skb, brd_delay, true); /* unicast packet */ } else { /* DHCP packets going to a server will use the GW feature */ if (dhcp_rcp == BATADV_DHCP_TO_SERVER) { ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (forw_mode == BATADV_FORW_UCASTS) { ret = batadv_mcast_forw_send(bat_priv, skb, vid, mcast_is_routable); } else if (forw_mode == BATADV_FORW_MCAST) { ret = batadv_mcast_forw_mcsend(bat_priv, skb); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) goto dropped; batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, vid); } if (ret != NET_XMIT_SUCCESS) goto dropped_freed; } batadv_inc_counter(bat_priv, BATADV_CNT_TX); batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len); goto end; dropped: kfree_skb(skb); dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: batadv_hardif_put(primary_if); return NETDEV_TX_OK; } /** * batadv_interface_rx() - receive ethernet frame on local batman-adv interface * @mesh_iface: local interface which will receive the ethernet frame * @skb: ethernet frame for @mesh_iface * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * * Sends an ethernet frame to the receive path of the local @mesh_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. * * The packet may still get dropped. This can happen when the encapsulated * ethernet frame is invalid or contains again an batman-adv packet. Also * unicast packets will be dropped directly when it was sent between two * isolated clients. */ void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); /* clean the netfilter state now that the batman-adv header has been * removed */ nf_reset_ct(skb); if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto dropped; vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) goto dropped; vhdr = skb_vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) break; fallthrough; case ETH_P_BATMAN: goto dropped; } /* skb->dev & skb->pkt_type are set here */ skb->protocol = eth_type_trans(skb, mesh_iface); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) batadv_tt_add_temporary_global_entry(bat_priv, orig_node, ethhdr->h_source, vid); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* set the mark on broadcast packets if AP isolation is ON and * the packet is coming from an "isolated" client */ if (batadv_vlan_ap_isola_get(bat_priv, vid) && batadv_tt_global_is_isolated(bat_priv, ethhdr->h_source, vid)) { /* save bits in skb->mark not covered by the mask and * apply the mark on the rest */ skb->mark &= ~bat_priv->isolation_mark_mask; skb->mark |= bat_priv->isolation_mark; } } else if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, vid)) { goto dropped; } netif_rx(skb); goto out; dropped: kfree_skb(skb); out: return; } /** * batadv_meshif_vlan_release() - release vlan from lists and queue for free * after rcu grace period * @ref: kref pointer of the vlan object */ void batadv_meshif_vlan_release(struct kref *ref) { struct batadv_meshif_vlan *vlan; vlan = container_of(ref, struct batadv_meshif_vlan, refcount); spin_lock_bh(&vlan->bat_priv->meshif_vlan_list_lock); hlist_del_rcu(&vlan->list); spin_unlock_bh(&vlan->bat_priv->meshif_vlan_list_lock); kfree_rcu(vlan, rcu); } /** * batadv_meshif_vlan_get() - get the vlan object for a specific vid * @bat_priv: the bat priv with all the mesh interface information * @vid: the identifier of the vlan object to retrieve * * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_meshif_vlan *vlan_tmp, *vlan = NULL; rcu_read_lock(); hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->meshif_vlan_list, list) { if (vlan_tmp->vid != vid) continue; if (!kref_get_unless_zero(&vlan_tmp->refcount)) continue; vlan = vlan_tmp; break; } rcu_read_unlock(); return vlan; } /** * batadv_meshif_create_vlan() - allocate the needed resources for a new vlan * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier * * Return: 0 on success, a negative error otherwise. */ int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_meshif_vlan *vlan; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { batadv_meshif_vlan_put(vlan); spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) { spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -ENOMEM; } vlan->bat_priv = bat_priv; vlan->vid = vid; kref_init(&vlan->refcount); atomic_set(&vlan->ap_isolation, 0); kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->meshif_vlan_list); spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ batadv_tt_local_add(bat_priv->mesh_iface, bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); /* don't return reference to new meshif_vlan */ batadv_meshif_vlan_put(vlan); return 0; } /** * batadv_meshif_destroy_vlan() - remove and destroy a meshif_vlan object * @bat_priv: the bat priv with all the mesh interface information * @vlan: the object to remove */ static void batadv_meshif_destroy_vlan(struct batadv_priv *bat_priv, struct batadv_meshif_vlan *vlan) { /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ batadv_tt_local_remove(bat_priv, bat_priv->mesh_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); batadv_meshif_vlan_put(vlan); } /** * batadv_interface_add_vid() - ndo_add_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the new vlan * * Set up all the internal structures for handling the new vlan on top of the * mesh interface * * Return: 0 on success or a negative error code in case of failure. */ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* VID 0 is only used to indicate "priority tag" frames which only * contain priority information and no VID. No management structures * should be created for this VID and it should be handled like an * untagged frame. */ if (vid == 0) return 0; vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that * it was not deleted yet. batadv_meshif_vlan_get() increases the * refcount in order to revive the object. * * if it does not exist then create it. */ vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return batadv_meshif_create_vlan(bat_priv, vid); /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() */ batadv_tt_local_add(bat_priv->mesh_iface, bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); return 0; } /** * batadv_interface_kill_vid() - ndo_kill_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the vlan id * @vid: identifier of the deleted vlan * * Destroy all the internal structures used to handle the vlan identified by vid * on top of the mesh interface * * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q * or -ENOENT if the specified vlan id wasn't registered. */ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. batman-adv does not know how to * handle other types */ if (proto != htons(ETH_P_8021Q)) return -EINVAL; /* "priority tag" frames are handled like "untagged" frames * and no meshif_vlan needs to be destroyed */ if (vid == 0) return 0; vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; batadv_meshif_destroy_vlan(bat_priv, vlan); /* finally free the vlan object */ batadv_meshif_vlan_put(vlan); return 0; } /* batman-adv network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ static struct lock_class_key batadv_netdev_xmit_lock_key; static struct lock_class_key batadv_netdev_addr_lock_key; /** * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue * @dev: device which owns the tx queue * @txq: tx queue to modify * @_unused: always NULL */ static void batadv_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) { lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); } /** * batadv_set_lockdep_class() - Set txq and addr_list lockdep class * @dev: network device to modify */ static void batadv_set_lockdep_class(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } /** * batadv_meshif_init_late() - late stage initialization of mesh interface * @dev: registered network device to modify * * Return: error code on failures */ static int batadv_meshif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; u32 random_seqno; int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; batadv_set_lockdep_class(dev); bat_priv = netdev_priv(dev); bat_priv->mesh_iface = dev; /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called */ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(u64)); if (!bat_priv->bat_counters) return -ENOMEM; atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bridge_loop_avoidance, 1); #endif #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif #ifdef CONFIG_BATMAN_ADV_MCAST atomic_set(&bat_priv->multicast_mode, 1); atomic_set(&bat_priv->multicast_fanout, 16); atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); atomic_set(&bat_priv->packet_size_max, BATADV_MAX_MTU); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); atomic_set(&bat_priv->tt.vn, 0); atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif atomic_set(&bat_priv->tp_num, 0); WRITE_ONCE(bat_priv->tt.local_changes, 0); bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; bat_priv->isolation_mark_mask = 0; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&bat_priv->frag_seqno, random_seqno); bat_priv->primary_if = NULL; batadv_nc_init_bat_priv(bat_priv); if (!bat_priv->algo_ops) { ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) goto free_bat_counters; } ret = batadv_mesh_init(dev); if (ret < 0) goto free_bat_counters; return 0; free_bat_counters: free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; return ret; } /** * batadv_meshif_slave_add() - Add a slave interface to a batadv_mesh_interface * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_slave_add(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->mesh_iface) goto out; ret = batadv_hardif_enable_interface(hard_iface, dev); out: batadv_hardif_put(hard_iface); return ret; } /** * batadv_meshif_slave_del() - Delete a slave iface from a batadv_mesh_interface * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_slave_del(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); if (!hard_iface || hard_iface->mesh_iface != dev) goto out; batadv_hardif_disable_interface(hard_iface); ret = 0; out: batadv_hardif_put(hard_iface); return ret; } static const struct net_device_ops batadv_netdev_ops = { .ndo_init = batadv_meshif_init_late, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, .ndo_vlan_rx_add_vid = batadv_interface_add_vid, .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, .ndo_set_mac_address = batadv_interface_set_mac_addr, .ndo_change_mtu = batadv_interface_change_mtu, .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, .ndo_add_slave = batadv_meshif_slave_add, .ndo_del_slave = batadv_meshif_slave_del, }; static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); strscpy(info->bus_info, "batman", sizeof(info->bus_info)); } /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 * Declare each description string in struct.name[] to get fixed sized buffer * and compile time checking for strings longer than ETH_GSTRING_LEN. */ static const struct { const char name[ETH_GSTRING_LEN]; } batadv_counters_strings[] = { { "tx" }, { "tx_bytes" }, { "tx_dropped" }, { "rx" }, { "rx_bytes" }, { "forward" }, { "forward_bytes" }, { "mgmt_tx" }, { "mgmt_tx_bytes" }, { "mgmt_rx" }, { "mgmt_rx_bytes" }, { "frag_tx" }, { "frag_tx_bytes" }, { "frag_rx" }, { "frag_rx_bytes" }, { "frag_fwd" }, { "frag_fwd_bytes" }, { "tt_request_tx" }, { "tt_request_rx" }, { "tt_response_tx" }, { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, #ifdef CONFIG_BATMAN_ADV_MCAST { "mcast_tx" }, { "mcast_tx_bytes" }, { "mcast_tx_local" }, { "mcast_tx_local_bytes" }, { "mcast_rx" }, { "mcast_rx_bytes" }, { "mcast_rx_local" }, { "mcast_rx_local_bytes" }, { "mcast_fwd" }, { "mcast_fwd_bytes" }, #endif #ifdef CONFIG_BATMAN_ADV_DAT { "dat_get_tx" }, { "dat_get_rx" }, { "dat_put_tx" }, { "dat_put_rx" }, { "dat_cached_reply_tx" }, #endif #ifdef CONFIG_BATMAN_ADV_NC { "nc_code" }, { "nc_code_bytes" }, { "nc_recode" }, { "nc_recode_bytes" }, { "nc_buffer" }, { "nc_decode" }, { "nc_decode_bytes" }, { "nc_decode_failed" }, { "nc_sniffed" }, #endif }; static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) { if (stringset == ETH_SS_STATS) memcpy(data, batadv_counters_strings, sizeof(batadv_counters_strings)); } static void batadv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct batadv_priv *bat_priv = netdev_priv(dev); int i; for (i = 0; i < BATADV_CNT_NUM; i++) data[i] = batadv_sum_counter(bat_priv, i); } static int batadv_get_sset_count(struct net_device *dev, int stringset) { if (stringset == ETH_SS_STATS) return BATADV_CNT_NUM; return -EOPNOTSUPP; } static const struct ethtool_ops batadv_ethtool_ops = { .get_drvinfo = batadv_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = batadv_get_strings, .get_ethtool_stats = batadv_get_ethtool_stats, .get_sset_count = batadv_get_sset_count, }; /** * batadv_meshif_free() - Deconstructor of batadv_mesh_interface * @dev: Device to cleanup and remove */ static void batadv_meshif_free(struct net_device *dev) { batadv_mesh_free(dev); /* some scheduled RCU callbacks need the bat_priv struct to accomplish * their tasks. Wait for them all to be finished before freeing the * netdev and its private data (bat_priv) */ rcu_barrier(); } /** * batadv_meshif_init_early() - early stage initialization of mesh interface * @dev: registered network device to modify */ static void batadv_meshif_init_early(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = batadv_meshif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; dev->netns_immutable = true; /* can't call min_mtu, because the needed variables * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; dev->max_mtu = BATADV_MAX_MTU; /* generate random address */ eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; } /** * batadv_meshif_validate() - validate configuration of new batadv link * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_algo_ops *algo_ops; if (!data) return 0; if (data[IFLA_BATADV_ALGO_NAME]) { algo_ops = batadv_algo_get(nla_data(data[IFLA_BATADV_ALGO_NAME])); if (!algo_ops) return -EINVAL; } return 0; } /** * batadv_meshif_newlink() - pre-initialize and register new batadv link * @dev: network device to register * @params: rtnl newlink parameters * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_meshif_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct batadv_priv *bat_priv = netdev_priv(dev); struct nlattr **data = params->data; const char *algo_name; int err; if (data && data[IFLA_BATADV_ALGO_NAME]) { algo_name = nla_data(data[IFLA_BATADV_ALGO_NAME]); err = batadv_algo_select(bat_priv, algo_name); if (err) return -EINVAL; } return register_netdevice(dev); } /** * batadv_meshif_destroy_netlink() - deletion of batadv_mesh_interface via * netlink * @mesh_iface: the to-be-removed batman-adv interface * @head: list pointer */ static void batadv_meshif_destroy_netlink(struct net_device *mesh_iface, struct list_head *head) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *hard_iface; struct batadv_meshif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->mesh_iface == mesh_iface) batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (vlan) { batadv_meshif_destroy_vlan(bat_priv, vlan); batadv_meshif_vlan_put(vlan); } unregister_netdevice_queue(mesh_iface, head); } /** * batadv_meshif_is_valid() - Check whether device is a batadv mesh interface * @net_dev: device which should be checked * * Return: true when net_dev is a batman-adv interface, false otherwise */ bool batadv_meshif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) return true; return false; } static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { [IFLA_BATADV_ALGO_NAME] = { .type = NLA_NUL_STRING }, }; struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), .setup = batadv_meshif_init_early, .maxtype = IFLA_BATADV_MAX, .policy = batadv_ifla_policy, .validate = batadv_meshif_validate, .newlink = batadv_meshif_newlink, .dellink = batadv_meshif_destroy_netlink, };
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * DES & Triple DES EDE Cipher Algorithms. * * Copyright (c) 2005 Dag Arne Osvik <da@osvik.no> */ #include <asm/byteorder.h> #include <crypto/algapi.h> #include <linux/bitops.h> #include <linux/init.h> #include <linux/module.h> #include <linux/errno.h> #include <crypto/internal/des.h> static int des_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct des_ctx *dctx = crypto_tfm_ctx(tfm); int err; err = des_expand_key(dctx, key, keylen); if (err == -ENOKEY) { if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) err = -EINVAL; else err = 0; } if (err) memset(dctx, 0, sizeof(*dctx)); return err; } static void crypto_des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { const struct des_ctx *dctx = crypto_tfm_ctx(tfm); des_encrypt(dctx, dst, src); } static void crypto_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { const struct des_ctx *dctx = crypto_tfm_ctx(tfm); des_decrypt(dctx, dst, src); } static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm); int err; err = des3_ede_expand_key(dctx, key, keylen); if (err == -ENOKEY) { if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) err = -EINVAL; else err = 0; } if (err) memset(dctx, 0, sizeof(*dctx)); return err; } static void crypto_des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm); des3_ede_encrypt(dctx, dst, src); } static void crypto_des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm); des3_ede_decrypt(dctx, dst, src); } static struct crypto_alg des_algs[2] = { { .cra_name = "des", .cra_driver_name = "des-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = DES_KEY_SIZE, .cia_max_keysize = DES_KEY_SIZE, .cia_setkey = des_setkey, .cia_encrypt = crypto_des_encrypt, .cia_decrypt = crypto_des_decrypt } } }, { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des3_ede_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = DES3_EDE_KEY_SIZE, .cia_max_keysize = DES3_EDE_KEY_SIZE, .cia_setkey = des3_ede_setkey, .cia_encrypt = crypto_des3_ede_encrypt, .cia_decrypt = crypto_des3_ede_decrypt } } } }; static int __init des_generic_mod_init(void) { return crypto_register_algs(des_algs, ARRAY_SIZE(des_algs)); } static void __exit des_generic_mod_fini(void) { crypto_unregister_algs(des_algs, ARRAY_SIZE(des_algs)); } subsys_initcall(des_generic_mod_init); module_exit(des_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); MODULE_AUTHOR("Dag Arne Osvik <da@osvik.no>"); MODULE_ALIAS_CRYPTO("des"); MODULE_ALIAS_CRYPTO("des-generic"); MODULE_ALIAS_CRYPTO("des3_ede"); MODULE_ALIAS_CRYPTO("des3_ede-generic");
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 // SPDX-License-Identifier: GPL-2.0-only /* * * Authors: * Alexander Aring <aar@pengutronix.de> * * Based on: net/wireless/sysfs.c */ #include <linux/device.h> #include <linux/rtnetlink.h> #include <net/cfg802154.h> #include "core.h" #include "sysfs.h" #include "rdev-ops.h" static inline struct cfg802154_registered_device * dev_to_rdev(struct device *dev) { return container_of(dev, struct cfg802154_registered_device, wpan_phy.dev); } #define SHOW_FMT(name, fmt, member) \ static ssize_t name ## _show(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ } \ static DEVICE_ATTR_RO(name) SHOW_FMT(index, "%d", wpan_phy_idx); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wpan_phy *wpan_phy = &dev_to_rdev(dev)->wpan_phy; return sprintf(buf, "%s\n", dev_name(&wpan_phy->dev)); } static DEVICE_ATTR_RO(name); static void wpan_phy_release(struct device *dev) { struct cfg802154_registered_device *rdev = dev_to_rdev(dev); cfg802154_dev_free(rdev); } static struct attribute *pmib_attrs[] = { &dev_attr_index.attr, &dev_attr_name.attr, NULL, }; ATTRIBUTE_GROUPS(pmib); #ifdef CONFIG_PM_SLEEP static int wpan_phy_suspend(struct device *dev) { struct cfg802154_registered_device *rdev = dev_to_rdev(dev); int ret = 0; if (rdev->ops->suspend) { rtnl_lock(); ret = rdev_suspend(rdev); rtnl_unlock(); } return ret; } static int wpan_phy_resume(struct device *dev) { struct cfg802154_registered_device *rdev = dev_to_rdev(dev); int ret = 0; if (rdev->ops->resume) { rtnl_lock(); ret = rdev_resume(rdev); rtnl_unlock(); } return ret; } static SIMPLE_DEV_PM_OPS(wpan_phy_pm_ops, wpan_phy_suspend, wpan_phy_resume); #define WPAN_PHY_PM_OPS (&wpan_phy_pm_ops) #else #define WPAN_PHY_PM_OPS NULL #endif const struct class wpan_phy_class = { .name = "ieee802154", .dev_release = wpan_phy_release, .dev_groups = pmib_groups, .pm = WPAN_PHY_PM_OPS, }; int wpan_phy_sysfs_init(void) { return class_register(&wpan_phy_class); } void wpan_phy_sysfs_exit(void) { class_unregister(&wpan_phy_class); }
2108 2104 5 3 2 1 1 1 22 2 11 10 19 1 18 1 17 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 // SPDX-License-Identifier: GPL-2.0-or-later /* * "TEE" target extension for Xtables * Copyright © Sebastian Claßen, 2007 * Jan Engelhardt, 2007-2010 * * based on ipt_ROUTE.c from Cédric de Launois * <delaunois@info.ucl.be> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/route.h> #include <linux/netfilter/x_tables.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/route.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h> #include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> struct xt_tee_priv { struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; static unsigned int tee_net_id __read_mostly; static const union nf_inet_addr tee_zero_address; struct tee_net { struct list_head priv_list; /* lock protects the priv_list */ struct mutex lock; }; static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } #endif static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tee_net *tn = net_generic(net, tee_net_id); struct xt_tee_priv *priv; mutex_lock(&tn->lock); list_for_each_entry(priv, &tn->priv_list, list) { switch (event) { case NETDEV_REGISTER: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; break; case NETDEV_UNREGISTER: if (dev->ifindex == priv->oif) priv->oif = -1; break; case NETDEV_CHANGENAME: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; else if (dev->ifindex == priv->oif) priv->oif = -1; break; } } mutex_unlock(&tn->lock); return NOTIFY_DONE; } static int tee_tg_check(const struct xt_tgchk_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; struct xt_tee_priv *priv; /* 0.0.0.0 and :: not allowed */ if (memcmp(&info->gw, &tee_zero_address, sizeof(tee_zero_address)) == 0) return -EINVAL; if (info->oif[0]) { struct net_device *dev; if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; priv->tginfo = info; priv->oif = -1; info->priv = priv; dev = dev_get_by_name(par->net, info->oif); if (dev) { priv->oif = dev->ifindex; dev_put(dev); } mutex_lock(&tn->lock); list_add(&priv->list, &tn->priv_list); mutex_unlock(&tn->lock); } else info->priv = NULL; static_key_slow_inc(&xt_tee_enabled); return 0; } static void tee_tg_destroy(const struct xt_tgdtor_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { mutex_lock(&tn->lock); list_del(&info->priv->list); mutex_unlock(&tn->lock); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { { .name = "TEE", .revision = 1, .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TEE", .revision = 1, .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #endif }; static int __net_init tee_net_init(struct net *net) { struct tee_net *tn = net_generic(net, tee_net_id); INIT_LIST_HEAD(&tn->priv_list); mutex_init(&tn->lock); return 0; } static struct pernet_operations tee_net_ops = { .init = tee_net_init, .id = &tee_net_id, .size = sizeof(struct tee_net), }; static struct notifier_block tee_netdev_notifier = { .notifier_call = tee_netdev_event, }; static int __init tee_tg_init(void) { int ret; ret = register_pernet_subsys(&tee_net_ops); if (ret < 0) return ret; ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); if (ret < 0) goto cleanup_subsys; ret = register_netdevice_notifier(&tee_netdev_notifier); if (ret < 0) goto unregister_targets; return 0; unregister_targets: xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); cleanup_subsys: unregister_pernet_subsys(&tee_net_ops); return ret; } static void __exit tee_tg_exit(void) { unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); unregister_pernet_subsys(&tee_net_ops); } module_init(tee_tg_init); module_exit(tee_tg_exit); MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Reroute packet copy"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TEE"); MODULE_ALIAS("ip6t_TEE");
72 34 1 2 33 16 1 3 2 1 2 2 7 9 2 8 1 3 2 2 4 5 1 2 2 2 5 1 2 2 8 1 2 3 2 4 2 2 7 1 3 3 4 5 1 2 2 7 1 2 2 2 6 2 2 2 33 36 3 3 2 1 1 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 4 4 2 3 3 1 1 33 33 33 2 31 2 31 33 25 37 24 22 2 21 3 24 14 20 31 31 25 6 18 13 37 20 23 16 20 18 39 5 2 2 1 3 2 1 3 1 2 124 125 129 25 25 45 37 36 2 2 36 2 37 1 37 1 36 1 36 2 35 3 36 2 38 35 3 10 2 1 1 2 1 3 3 22 22 22 22 22 22 21 21 21 17 17 1 16 33 33 6 30 1 32 9 9 6 9 32 33 33 33 32 1 44 44 7 5 34 2 35 1 34 32 1 34 3 1 33 1 1 30 1 33 1 32 33 1 32 95 2 73 20 72 5 16 3 13 90 6 72 9 73 30 65 65 64 65 62 57 3 123 87 122 1 1 1 1 1 1 1 2 1 1 121 9 2105 2106 61 60 28 4 2 1 2 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/macsec.c - MACsec device * * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net> */ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/module.h> #include <crypto/aead.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/refcount.h> #include <net/genetlink.h> #include <net/sock.h> #include <net/gro_cells.h> #include <net/macsec.h> #include <net/dst_metadata.h> #include <net/netdev_lock.h> #include <linux/phy.h> #include <linux/byteorder/generic.h> #include <linux/if_arp.h> #include <uapi/linux/if_macsec.h> /* SecTAG length = macsec_eth_header without the optional SCI */ #define MACSEC_TAG_LEN 6 struct macsec_eth_header { struct ethhdr eth; /* SecTAG */ u8 tci_an; #if defined(__LITTLE_ENDIAN_BITFIELD) u8 short_length:6, unused:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 unused:2, short_length:6; #else #error "Please fix <asm/byteorder.h>" #endif __be32 packet_number; u8 secure_channel_id[8]; /* optional */ } __packed; /* minimum secure data length deemed "not short", see IEEE 802.1AE-2006 9.7 */ #define MIN_NON_SHORT_LEN 48 #define GCM_AES_IV_LEN 12 #define for_each_rxsc(secy, sc) \ for (sc = rcu_dereference_bh(secy->rx_sc); \ sc; \ sc = rcu_dereference_bh(sc->next)) #define for_each_rxsc_rtnl(secy, sc) \ for (sc = rtnl_dereference(secy->rx_sc); \ sc; \ sc = rtnl_dereference(sc->next)) #define pn_same_half(pn1, pn2) (!(((pn1) >> 31) ^ ((pn2) >> 31))) struct gcm_iv_xpn { union { u8 short_secure_channel_id[4]; ssci_t ssci; }; __be64 pn; } __packed; struct gcm_iv { union { u8 secure_channel_id[8]; sci_t sci; }; __be32 pn; }; #define MACSEC_VALIDATE_DEFAULT MACSEC_VALIDATE_STRICT struct pcpu_secy_stats { struct macsec_dev_stats stats; struct u64_stats_sync syncp; }; /** * struct macsec_dev - private data * @secy: SecY config * @real_dev: pointer to underlying netdevice * @dev_tracker: refcount tracker for @real_dev reference * @stats: MACsec device stats * @secys: linked list of SecY's on the underlying device * @gro_cells: pointer to the Generic Receive Offload cell * @offload: status of offloading on the MACsec device * @insert_tx_tag: when offloading, device requires to insert an * additional tag */ struct macsec_dev { struct macsec_secy secy; struct net_device *real_dev; netdevice_tracker dev_tracker; struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; enum macsec_offload offload; bool insert_tx_tag; }; /** * struct macsec_rxh_data - rx_handler private argument * @secys: linked list of SecY's on this underlying device */ struct macsec_rxh_data { struct list_head secys; }; static struct macsec_dev *macsec_priv(const struct net_device *dev) { return (struct macsec_dev *)netdev_priv(dev); } static struct macsec_rxh_data *macsec_data_rcu(const struct net_device *dev) { return rcu_dereference_bh(dev->rx_handler_data); } static struct macsec_rxh_data *macsec_data_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->rx_handler_data); } struct macsec_cb { struct aead_request *req; union { struct macsec_tx_sa *tx_sa; struct macsec_rx_sa *rx_sa; }; u8 assoc_num; bool valid; bool has_sci; }; static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) { struct macsec_rx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_rx_sc_rcu(struct rcu_head *head) { struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head); free_percpu(rx_sc->stats); kfree(rx_sc); } static struct macsec_rx_sc *macsec_rxsc_get(struct macsec_rx_sc *sc) { return refcount_inc_not_zero(&sc->refcnt) ? sc : NULL; } static void macsec_rxsc_put(struct macsec_rx_sc *sc) { if (refcount_dec_and_test(&sc->refcnt)) call_rcu(&sc->rcu_head, free_rx_sc_rcu); } static void free_rxsa(struct rcu_head *head) { struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_rxsa_put(struct macsec_rx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_rxsa); } static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) { struct macsec_tx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_txsa(struct rcu_head *head) { struct macsec_tx_sa *sa = container_of(head, struct macsec_tx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_txsa_put(struct macsec_tx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_txsa); } static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct macsec_cb) > sizeof(skb->cb)); return (struct macsec_cb *)skb->cb; } #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) #define MACSEC_UNDEF_SSCI ((__force ssci_t)0xffffffff) #define MACSEC_GCM_AES_128_SAK_LEN 16 #define MACSEC_GCM_AES_256_SAK_LEN 32 #define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN #define DEFAULT_XPN false #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 #define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static sci_t make_sci(const u8 *addr, __be16 port) { sci_t sci; memcpy(&sci, addr, ETH_ALEN); memcpy(((char *)&sci) + ETH_ALEN, &port, sizeof(port)); return sci; } static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) { sci_t sci; if (sci_present) memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); else sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); return sci; } static unsigned int macsec_sectag_len(bool sci_present) { return MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); } static unsigned int macsec_hdr_len(bool sci_present) { return macsec_sectag_len(sci_present) + ETH_HLEN; } static unsigned int macsec_extra_len(bool sci_present) { return macsec_sectag_len(sci_present) + sizeof(__be16); } /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */ static void macsec_fill_sectag(struct macsec_eth_header *h, const struct macsec_secy *secy, u32 pn, bool sci_present) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; memset(&h->tci_an, 0, macsec_sectag_len(sci_present)); h->eth.h_proto = htons(ETH_P_MACSEC); if (sci_present) { h->tci_an |= MACSEC_TCI_SC; memcpy(&h->secure_channel_id, &secy->sci, sizeof(h->secure_channel_id)); } else { if (tx_sc->end_station) h->tci_an |= MACSEC_TCI_ES; if (tx_sc->scb) h->tci_an |= MACSEC_TCI_SCB; } h->packet_number = htonl(pn); /* with GCM, C/E clear for !encrypt, both set for encrypt */ if (tx_sc->encrypt) h->tci_an |= MACSEC_TCI_CONFID; else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) h->tci_an |= MACSEC_TCI_C; h->tci_an |= tx_sc->encoding_sa; } static void macsec_set_shortlen(struct macsec_eth_header *h, size_t data_len) { if (data_len < MIN_NON_SHORT_LEN) h->short_length = data_len; } /* Checks if a MACsec interface is being offloaded to an hardware engine */ static bool macsec_is_offloaded(struct macsec_dev *macsec) { if (macsec->offload == MACSEC_OFFLOAD_MAC || macsec->offload == MACSEC_OFFLOAD_PHY) return true; return false; } /* Checks if underlying layers implement MACsec offloading functions. */ static bool macsec_check_offload(enum macsec_offload offload, struct macsec_dev *macsec) { if (!macsec || !macsec->real_dev) return false; if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev && macsec->real_dev->phydev->macsec_ops; else if (offload == MACSEC_OFFLOAD_MAC) return macsec->real_dev->features & NETIF_F_HW_MACSEC && macsec->real_dev->macsec_ops; return false; } static const struct macsec_ops *__macsec_get_ops(enum macsec_offload offload, struct macsec_dev *macsec, struct macsec_context *ctx) { if (ctx) { memset(ctx, 0, sizeof(*ctx)); ctx->offload = offload; if (offload == MACSEC_OFFLOAD_PHY) ctx->phydev = macsec->real_dev->phydev; else if (offload == MACSEC_OFFLOAD_MAC) ctx->netdev = macsec->real_dev; } if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev->macsec_ops; else return macsec->real_dev->macsec_ops; } /* Returns a pointer to the MACsec ops struct if any and updates the MACsec * context device reference if provided. */ static const struct macsec_ops *macsec_get_ops(struct macsec_dev *macsec, struct macsec_context *ctx) { if (!macsec_check_offload(macsec->offload, macsec)) return NULL; return __macsec_get_ops(macsec->offload, macsec, ctx); } /* validate MACsec packet according to IEEE 802.1AE-2018 9.12 */ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len, bool xpn) { struct macsec_eth_header *h = (struct macsec_eth_header *)skb->data; int len = skb->len - 2 * ETH_ALEN; int extra_len = macsec_extra_len(!!(h->tci_an & MACSEC_TCI_SC)) + icv_len; /* a) It comprises at least 17 octets */ if (skb->len <= 16) return false; /* b) MACsec EtherType: already checked */ /* c) V bit is clear */ if (h->tci_an & MACSEC_TCI_VERSION) return false; /* d) ES or SCB => !SC */ if ((h->tci_an & MACSEC_TCI_ES || h->tci_an & MACSEC_TCI_SCB) && (h->tci_an & MACSEC_TCI_SC)) return false; /* e) Bits 7 and 8 of octet 4 of the SecTAG are clear */ if (h->unused) return false; /* rx.pn != 0 if not XPN (figure 10-5 with 802.11AEbw-2013 amendment) */ if (!h->packet_number && !xpn) return false; /* length check, f) g) h) i) */ if (h->short_length) return len == extra_len + h->short_length; return len >= extra_len + MIN_NON_SHORT_LEN; } #define MACSEC_NEEDED_HEADROOM (macsec_extra_len(true)) #define MACSEC_NEEDED_TAILROOM MACSEC_STD_ICV_LEN static void macsec_fill_iv_xpn(unsigned char *iv, ssci_t ssci, u64 pn, salt_t salt) { struct gcm_iv_xpn *gcm_iv = (struct gcm_iv_xpn *)iv; gcm_iv->ssci = ssci ^ salt.ssci; gcm_iv->pn = cpu_to_be64(pn) ^ salt.pn; } static void macsec_fill_iv(unsigned char *iv, sci_t sci, u32 pn) { struct gcm_iv *gcm_iv = (struct gcm_iv *)iv; gcm_iv->sci = sci; gcm_iv->pn = htonl(pn); } static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) { return (struct macsec_eth_header *)skb_mac_header(skb); } static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { pr_debug("PN wrapped, transitioning to !oper\n"); tx_sa->active = false; if (secy->protect_frames) secy->operational = false; } void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { spin_lock_bh(&tx_sa->lock); __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); } EXPORT_SYMBOL_GPL(macsec_pn_wrapped); static pn_t tx_sa_update_pn(struct macsec_tx_sa *tx_sa, struct macsec_secy *secy) { pn_t pn; spin_lock_bh(&tx_sa->lock); pn = tx_sa->next_pn_halves; if (secy->xpn) tx_sa->next_pn++; else tx_sa->next_pn_halves.lower++; if (tx_sa->next_pn == 0) __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); return pn; } static void macsec_encrypt_finish(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = netdev_priv(dev); skb->dev = macsec->real_dev; skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; } static unsigned int macsec_msdu_len(struct sk_buff *skb) { struct macsec_dev *macsec = macsec_priv(skb->dev); struct macsec_secy *secy = &macsec->secy; bool sci_present = macsec_skb_cb(skb)->has_sci; return skb->len - macsec_hdr_len(sci_present) - secy->icv_len; } static void macsec_count_tx(struct sk_buff *skb, struct macsec_tx_sc *tx_sc, struct macsec_tx_sa *tx_sa) { unsigned int msdu_len = macsec_msdu_len(skb); struct pcpu_tx_sc_stats *txsc_stats = this_cpu_ptr(tx_sc->stats); u64_stats_update_begin(&txsc_stats->syncp); if (tx_sc->encrypt) { txsc_stats->stats.OutOctetsEncrypted += msdu_len; txsc_stats->stats.OutPktsEncrypted++; this_cpu_inc(tx_sa->stats->OutPktsEncrypted); } else { txsc_stats->stats.OutOctetsProtected += msdu_len; txsc_stats->stats.OutPktsProtected++; this_cpu_inc(tx_sa->stats->OutPktsProtected); } u64_stats_update_end(&txsc_stats->syncp); } static void count_tx(struct net_device *dev, int ret, int len) { if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) dev_sw_netstats_tx_add(dev, 1, len); } static void macsec_encrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_tx_sa *sa = macsec_skb_cb(skb)->tx_sa; int len, ret; aead_request_free(macsec_skb_cb(skb)->req); rcu_read_lock_bh(); macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa); /* packet is encrypted/protected so tx_bytes must be calculated */ len = macsec_msdu_len(skb) + 2 * ETH_ALEN; macsec_encrypt_finish(skb, dev); ret = dev_queue_xmit(skb); count_tx(dev, ret, len); rcu_read_unlock_bh(); macsec_txsa_put(sa); dev_put(dev); } static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, struct scatterlist **sg, int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; void *tmp; size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm); iv_offset = size; size += GCM_AES_IV_LEN; size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) return NULL; *iv = (unsigned char *)(tmp + iv_offset); *sg = (struct scatterlist *)(tmp + sg_offset); req = tmp; aead_request_set_tfm(req, tfm); return req; } static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct net_device *dev) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; size_t unprotected_len; struct aead_request *req; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); bool sci_present; pn_t pn; secy = &macsec->secy; tx_sc = &secy->tx_sc; /* 10.5.1 TX SA assignment */ tx_sa = macsec_txsa_get(tx_sc->sa[tx_sc->encoding_sa]); if (!tx_sa) { secy->operational = false; kfree_skb(skb); return ERR_PTR(-EINVAL); } if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM || skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) { struct sk_buff *nskb = skb_copy_expand(skb, MACSEC_NEEDED_HEADROOM, MACSEC_NEEDED_TAILROOM, GFP_ATOMIC); if (likely(nskb)) { consume_skb(skb); skb = nskb; } else { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } } else { skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { macsec_txsa_put(tx_sa); return ERR_PTR(-ENOMEM); } } unprotected_len = skb->len; eth = eth_hdr(skb); sci_present = macsec_send_sci(secy); hh = skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); if (pn.full64 == 0) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOLINK); } macsec_fill_sectag(hh, secy, pn.lower, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) { struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsTooLong++; u64_stats_update_end(&secy_stats->syncp); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-EINVAL); } ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } if (secy->xpn) macsec_fill_iv_xpn(iv, tx_sa->ssci, pn.full64, tx_sa->key.salt); else macsec_fill_iv(iv, secy->sci, pn.lower); sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } if (tx_sc->encrypt) { int len = skb->len - macsec_hdr_len(sci_present) - secy->icv_len; aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(sci_present)); } else { aead_request_set_crypt(req, sg, sg, 0, iv); aead_request_set_ad(req, skb->len - secy->icv_len); } macsec_skb_cb(skb)->req = req; macsec_skb_cb(skb)->tx_sa = tx_sa; macsec_skb_cb(skb)->has_sci = sci_present; aead_request_set_callback(req, 0, macsec_encrypt_done, skb); dev_hold(skb->dev); ret = crypto_aead_encrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { dev_put(skb->dev); kfree_skb(skb); aead_request_free(req); macsec_txsa_put(tx_sa); return ERR_PTR(-EINVAL); } dev_put(skb->dev); aead_request_free(req); macsec_txsa_put(tx_sa); return skb; } static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u32 pn) { struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct pcpu_rx_sc_stats *rxsc_stats = this_cpu_ptr(rx_sa->sc->stats); struct macsec_eth_header *hdr = macsec_ethhdr(skb); u32 lowest_pn = 0; spin_lock(&rx_sa->lock); if (rx_sa->next_pn_halves.lower >= secy->replay_window) lowest_pn = rx_sa->next_pn_halves.lower - secy->replay_window; /* Now perform replay protection check again * (see IEEE 802.1AE-2006 figure 10-5) */ if (secy->replay_protect && pn < lowest_pn && (!secy->xpn || pn_same_half(pn, lowest_pn))) { spin_unlock(&rx_sa->lock); u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_dropped); return false; } if (secy->validate_frames != MACSEC_VALIDATE_DISABLED) { unsigned int msdu_len = macsec_msdu_len(skb); u64_stats_update_begin(&rxsc_stats->syncp); if (hdr->tci_an & MACSEC_TCI_E) rxsc_stats->stats.InOctetsDecrypted += msdu_len; else rxsc_stats->stats.InOctetsValidated += msdu_len; u64_stats_update_end(&rxsc_stats->syncp); } if (!macsec_skb_cb(skb)->valid) { spin_unlock(&rx_sa->lock); /* 10.6.5 */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotValid++; u64_stats_update_end(&rxsc_stats->syncp); this_cpu_inc(rx_sa->stats->InPktsNotValid); DEV_STATS_INC(secy->netdev, rx_errors); return false; } u64_stats_update_begin(&rxsc_stats->syncp); if (secy->validate_frames == MACSEC_VALIDATE_CHECK) { rxsc_stats->stats.InPktsInvalid++; this_cpu_inc(rx_sa->stats->InPktsInvalid); } else if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsUnchecked++; } u64_stats_update_end(&rxsc_stats->syncp); } else { u64_stats_update_begin(&rxsc_stats->syncp); if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsOK++; this_cpu_inc(rx_sa->stats->InPktsOK); } u64_stats_update_end(&rxsc_stats->syncp); // Instead of "pn >=" - to support pn overflow in xpn if (pn + 1 > rx_sa->next_pn_halves.lower) { rx_sa->next_pn_halves.lower = pn + 1; } else if (secy->xpn && !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { rx_sa->next_pn_halves.upper++; rx_sa->next_pn_halves.lower = pn + 1; } spin_unlock(&rx_sa->lock); } return true; } static void macsec_reset_skb(struct sk_buff *skb, struct net_device *dev) { skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); if (!skb_transport_header_was_set(skb)) skb_reset_transport_header(skb); skb_reset_mac_len(skb); } static void macsec_finalize_skb(struct sk_buff *skb, u8 icv_len, u8 hdr_len) { skb->ip_summed = CHECKSUM_NONE; memmove(skb->data + hdr_len, skb->data, 2 * ETH_ALEN); skb_pull(skb, hdr_len); pskb_trim_unique(skb, skb->len - icv_len); } static void count_rx(struct net_device *dev, int len) { dev_sw_netstats_rx_add(dev, len); } static void macsec_decrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct macsec_rx_sc *rx_sc = rx_sa->sc; int len; u32 pn; aead_request_free(macsec_skb_cb(skb)->req); if (!err) macsec_skb_cb(skb)->valid = true; rcu_read_lock_bh(); pn = ntohl(macsec_ethhdr(skb)->packet_number); if (!macsec_post_decrypt(skb, &macsec->secy, pn)) { rcu_read_unlock_bh(); kfree_skb(skb); goto out; } macsec_finalize_skb(skb, macsec->secy.icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, macsec->secy.netdev); if (gro_cells_receive(&macsec->gro_cells, skb) == NET_RX_SUCCESS) count_rx(dev, len); rcu_read_unlock_bh(); out: macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); dev_put(dev); } static struct sk_buff *macsec_decrypt(struct sk_buff *skb, struct net_device *dev, struct macsec_rx_sa *rx_sa, sci_t sci, struct macsec_secy *secy) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; u32 hdr_pn; u16 icv_len = secy->icv_len; macsec_skb_cb(skb)->valid = false; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); } hdr = (struct macsec_eth_header *)skb->data; hdr_pn = ntohl(hdr->packet_number); if (secy->xpn) { pn_t recovered_pn = rx_sa->next_pn_halves; recovered_pn.lower = hdr_pn; if (hdr_pn < rx_sa->next_pn_halves.lower && !pn_same_half(hdr_pn, rx_sa->next_pn_halves.lower)) recovered_pn.upper++; macsec_fill_iv_xpn(iv, rx_sa->ssci, recovered_pn.full64, rx_sa->key.salt); } else { macsec_fill_iv(iv, sci, hdr_pn); } sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); kfree_skb(skb); return ERR_PTR(ret); } if (hdr->tci_an & MACSEC_TCI_E) { /* confidentiality: ethernet + macsec header * authenticated, encrypted payload */ int len = skb->len - macsec_hdr_len(macsec_skb_cb(skb)->has_sci); aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(macsec_skb_cb(skb)->has_sci)); skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { aead_request_free(req); return ERR_PTR(-ENOMEM); } } else { /* integrity only: all headers + data authenticated */ aead_request_set_crypt(req, sg, sg, icv_len, iv); aead_request_set_ad(req, skb->len - icv_len); } macsec_skb_cb(skb)->req = req; skb->dev = dev; aead_request_set_callback(req, 0, macsec_decrypt_done, skb); dev_hold(dev); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { /* decryption/authentication failed * 10.6 if validateFrames is disabled, deliver anyway */ if (ret != -EBADMSG) { kfree_skb(skb); skb = ERR_PTR(ret); } } else { macsec_skb_cb(skb)->valid = true; } dev_put(dev); aead_request_free(req); return skb; } static struct macsec_rx_sc *find_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static struct macsec_rx_sc *find_rx_sc_rtnl(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc_rtnl(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) { /* Deliver to the uncontrolled port by default */ enum rx_handler_result ret = RX_HANDLER_PASS; struct ethhdr *hdr = eth_hdr(skb); struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); struct net_device *ndev = macsec->secy.netdev; /* If h/w offloading is enabled, HW decodes frames and strips * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; if (is_macsec_md_dst) { struct macsec_rx_sc *rx_sc; /* All drivers that implement MACsec offload * support using skb metadata destinations must * indicate that they do so. */ DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci); if (!rx_sc) continue; /* device indicated macsec offload occurred */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; eth_skb_pkt_type(skb, ndev); ret = RX_HANDLER_ANOTHER; goto out; } /* This datapath is insecure because it is unable to * enforce isolation of broadcast/multicast traffic and * unicast traffic with promiscuous mode on the macsec * netdev. Since the core stack has no mechanism to * check that the hardware did indeed receive MACsec * traffic, it is possible that the response handling * done by the MACsec port was to a plaintext packet. * This violates the MACsec protocol standard. */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } else if (is_multicast_ether_addr_64bits( hdr->h_dest)) { /* multicast frame, deliver on this port too */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); } else if (ndev->flags & IFF_PROMISC) { skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } continue; } /* 10.6 If the management control validateFrames is not * Strict, frames without a SecTAG are received, counted, and * delivered to the Controlled Port */ if (macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); continue; } /* deliver on this port */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; if (__netif_rx(nskb) == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); } } out: rcu_read_unlock(); return ret; } static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct macsec_eth_header *hdr; struct macsec_secy *secy = NULL; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; sci_t sci; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; struct pcpu_secy_stats *secy_stats; bool pulled_sci; int ret; if (skb_headroom(skb) < ETH_HLEN) goto drop_direct; hdr = macsec_ethhdr(skb); if (hdr->eth.h_proto != htons(ETH_P_MACSEC)) return handle_not_macsec(skb); skb = skb_unshare(skb, GFP_ATOMIC); *pskb = skb; if (!skb) return RX_HANDLER_CONSUMED; pulled_sci = pskb_may_pull(skb, macsec_extra_len(true)); if (!pulled_sci) { if (!pskb_may_pull(skb, macsec_extra_len(false))) goto drop_direct; } hdr = macsec_ethhdr(skb); /* Frames with a SecTAG that has the TCI E bit set but the C * bit clear are discarded, as this reserved encoding is used * to identify frames with a SecTAG that are not to be * delivered to the Controlled Port. */ if ((hdr->tci_an & (MACSEC_TCI_C | MACSEC_TCI_E)) == MACSEC_TCI_E) return RX_HANDLER_PASS; /* now, pull the extra length */ if (hdr->tci_an & MACSEC_TCI_SC) { if (!pulled_sci) goto drop_direct; } /* ethernet header is part of crypto processing */ skb_push(skb, ETH_HLEN); macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); sc = sc ? macsec_rxsc_get(sc) : NULL; if (sc) { secy = &macsec->secy; rx_sc = sc; break; } } if (!secy) goto nosci; dev = secy->netdev; macsec = macsec_priv(dev); secy_stats = this_cpu_ptr(macsec->stats); rxsc_stats = this_cpu_ptr(rx_sc->stats); if (!macsec_validate_skb(skb, secy->icv_len, secy->xpn)) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsBadTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } rx_sa = macsec_rxsa_get(rx_sc->sa[macsec_skb_cb(skb)->assoc_num]); if (!rx_sa) { /* 10.6.1 if the SA is not in use */ /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotUsingSA++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } /* not Strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsUnusedSA++; u64_stats_update_end(&rxsc_stats->syncp); goto deliver; } /* First, PN check to avoid decrypting obviously wrong packets */ hdr_pn = ntohl(hdr->packet_number); if (secy->replay_protect) { bool late; spin_lock(&rx_sa->lock); late = rx_sa->next_pn_halves.lower >= secy->replay_window && hdr_pn < (rx_sa->next_pn_halves.lower - secy->replay_window); if (secy->xpn) late = late && pn_same_half(rx_sa->next_pn_halves.lower, hdr_pn); spin_unlock(&rx_sa->lock); if (late) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); goto drop; } } macsec_skb_cb(skb)->rx_sa = rx_sa; /* Disabled && !changed text => skip validation */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames != MACSEC_VALIDATE_DISABLED) skb = macsec_decrypt(skb, dev, rx_sa, sci, secy); if (IS_ERR(skb)) { /* the decrypt callback needs the reference */ if (PTR_ERR(skb) != -EINPROGRESS) { macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); } rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; } if (!macsec_post_decrypt(skb, secy, hdr_pn)) goto drop; deliver: macsec_finalize_skb(skb, secy->icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, secy->netdev); if (rx_sa) macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); skb_orphan(skb); ret = gro_cells_receive(&macsec->gro_cells, skb); if (ret == NET_RX_SUCCESS) count_rx(dev, len); else DEV_STATS_INC(macsec->secy.netdev, rx_dropped); rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; drop: macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); rcu_read_unlock(); drop_direct: kfree_skb(skb); *pskb = NULL; return RX_HANDLER_CONSUMED; nosci: /* 10.6.1 if the SC is not found */ cbit = !!(hdr->tci_an & MACSEC_TCI_C); if (!cbit) macsec_finalize_skb(skb, MACSEC_DEFAULT_ICV_LEN, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; secy_stats = this_cpu_ptr(macsec->stats); /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (cbit || macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoSCI++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_errors); continue; } /* not strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; macsec_reset_skb(nskb, macsec->secy.netdev); ret = __netif_rx(nskb); if (ret == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUnknownSCI++; u64_stats_update_end(&secy_stats->syncp); } else { DEV_STATS_INC(macsec->secy.netdev, rx_dropped); } } rcu_read_unlock(); *pskb = skb; return RX_HANDLER_PASS; } static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) { struct crypto_aead *tfm; int ret; tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (IS_ERR(tfm)) return tfm; ret = crypto_aead_setkey(tfm, key, key_len); if (ret < 0) goto fail; ret = crypto_aead_setauthsize(tfm, icv_len); if (ret < 0) goto fail; return tfm; fail: crypto_free_aead(tfm); return ERR_PTR(ret); } static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, int icv_len) { rx_sa->stats = alloc_percpu(struct macsec_rx_sa_stats); if (!rx_sa->stats) return -ENOMEM; rx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(rx_sa->key.tfm)) { free_percpu(rx_sa->stats); return PTR_ERR(rx_sa->key.tfm); } rx_sa->ssci = MACSEC_UNDEF_SSCI; rx_sa->active = false; rx_sa->next_pn = 1; refcount_set(&rx_sa->refcnt, 1); spin_lock_init(&rx_sa->lock); return 0; } static void clear_rx_sa(struct macsec_rx_sa *rx_sa) { rx_sa->active = false; macsec_rxsa_put(rx_sa); } static void free_rx_sc(struct macsec_rx_sc *rx_sc) { int i; for (i = 0; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *sa = rtnl_dereference(rx_sc->sa[i]); RCU_INIT_POINTER(rx_sc->sa[i], NULL); if (sa) clear_rx_sa(sa); } macsec_rxsc_put(rx_sc); } static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc, __rcu **rx_scp; for (rx_scp = &secy->rx_sc, rx_sc = rtnl_dereference(*rx_scp); rx_sc; rx_scp = &rx_sc->next, rx_sc = rtnl_dereference(*rx_scp)) { if (rx_sc->sci == sci) { if (rx_sc->active) secy->n_rx_sc--; rcu_assign_pointer(*rx_scp, rx_sc->next); return rx_sc; } } return NULL; } static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; struct net_device *real_dev = macsec_priv(dev)->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); struct macsec_secy *secy; list_for_each_entry(macsec, &rxd->secys, secys) { if (find_rx_sc_rtnl(&macsec->secy, sci)) return ERR_PTR(-EEXIST); } rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); if (!rx_sc) return ERR_PTR(-ENOMEM); rx_sc->stats = netdev_alloc_pcpu_stats(struct pcpu_rx_sc_stats); if (!rx_sc->stats) { kfree(rx_sc); return ERR_PTR(-ENOMEM); } rx_sc->sci = sci; rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; rcu_assign_pointer(rx_sc->next, secy->rx_sc); rcu_assign_pointer(secy->rx_sc, rx_sc); if (rx_sc->active) secy->n_rx_sc++; return rx_sc; } static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, int icv_len) { tx_sa->stats = alloc_percpu(struct macsec_tx_sa_stats); if (!tx_sa->stats) return -ENOMEM; tx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(tx_sa->key.tfm)) { free_percpu(tx_sa->stats); return PTR_ERR(tx_sa->key.tfm); } tx_sa->ssci = MACSEC_UNDEF_SSCI; tx_sa->active = false; refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); return 0; } static void clear_tx_sa(struct macsec_tx_sa *tx_sa) { tx_sa->active = false; macsec_txsa_put(tx_sa); } static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) { int ifindex = nla_get_u32(attrs[MACSEC_ATTR_IFINDEX]); struct net_device *dev; dev = __dev_get_by_index(net, ifindex); if (!dev) return ERR_PTR(-ENODEV); if (!netif_is_macsec(dev)) return ERR_PTR(-ENODEV); return dev; } static enum macsec_offload nla_get_offload(const struct nlattr *nla) { return (__force enum macsec_offload)nla_get_u8(nla); } static sci_t nla_get_sci(const struct nlattr *nla) { return (__force sci_t)nla_get_u64(nla); } static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value, int padattr) { return nla_put_u64_64bit(skb, attrtype, (__force u64)value, padattr); } static ssci_t nla_get_ssci(const struct nlattr *nla) { return (__force ssci_t)nla_get_u32(nla); } static int nla_put_ssci(struct sk_buff *skb, int attrtype, ssci_t value) { return nla_put_u32(skb, attrtype, (__force u64)value); } static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_tx_sc **scp, u8 *assoc_num) { struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); if (*assoc_num >= MACSEC_NUM_AN) return ERR_PTR(-EINVAL); secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; tx_sa = rtnl_dereference(tx_sc->sa[*assoc_num]); if (!tx_sa) return ERR_PTR(-ENODEV); *devp = dev; *scp = tx_sc; *secyp = secy; return tx_sa; } static struct macsec_rx_sc *get_rxsc_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct net_device **devp, struct macsec_secy **secyp) { struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); secy = &macsec_priv(dev)->secy; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return ERR_PTR(-EINVAL); sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = find_rx_sc_rtnl(secy, sci); if (!rx_sc) return ERR_PTR(-ENODEV); *secyp = secy; *devp = dev; return rx_sc; } static struct macsec_rx_sa *get_rxsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_rx_sc **scp, u8 *assoc_num) { struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (*assoc_num >= MACSEC_NUM_AN) return ERR_PTR(-EINVAL); rx_sc = get_rxsc_from_nl(net, attrs, tb_rxsc, devp, secyp); if (IS_ERR(rx_sc)) return ERR_CAST(rx_sc); rx_sa = rtnl_dereference(rx_sc->sa[*assoc_num]); if (!rx_sa) return ERR_PTR(-ENODEV); *scp = rx_sc; return rx_sa; } static const struct nla_policy macsec_genl_policy[NUM_MACSEC_ATTR] = { [MACSEC_ATTR_IFINDEX] = { .type = NLA_U32 }, [MACSEC_ATTR_RXSC_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_SA_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_OFFLOAD] = { .type = NLA_NESTED }, }; static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = { [MACSEC_RXSC_ATTR_SCI] = { .type = NLA_U64 }, [MACSEC_RXSC_ATTR_ACTIVE] = { .type = NLA_U8 }, }; static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN_LEN(4), [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, .len = MACSEC_MAX_KEY_LEN, }, [MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 }, [MACSEC_SA_ATTR_SALT] = { .type = NLA_BINARY, .len = MACSEC_SALT_LEN, }, }; static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = { [MACSEC_OFFLOAD_ATTR_TYPE] = { .type = NLA_U8 }, }; /* Offloads an operation to a device driver */ static int macsec_offload(int (* const func)(struct macsec_context *), struct macsec_context *ctx) { int ret; if (unlikely(!func)) return 0; if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_lock(&ctx->phydev->lock); ret = (*func)(ctx); if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_unlock(&ctx->phydev->lock); return ret; } static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) { if (!attrs[MACSEC_ATTR_SA_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_sa, MACSEC_SA_ATTR_MAX, attrs[MACSEC_ATTR_SA_CONFIG], macsec_genl_sa_policy, NULL)) return -EINVAL; return 0; } static int parse_rxsc_config(struct nlattr **attrs, struct nlattr **tb_rxsc) { if (!attrs[MACSEC_ATTR_RXSC_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_rxsc, MACSEC_RXSC_ATTR_MAX, attrs[MACSEC_ATTR_RXSC_CONFIG], macsec_genl_rxsc_policy, NULL)) return -EINVAL; return 0; } static bool validate_add_rxsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) return false; return true; } static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_rxsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (tb_sa[MACSEC_SA_ATTR_PN] && nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } } rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]); if (rx_sa) { rtnl_unlock(); return -EBUSY; } rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL); if (!rx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(rx_sa); rtnl_unlock(); return err; } if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); rx_sa->sc = rx_sc; if (secy->xpn) { rx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(rx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); rtnl_unlock(); return 0; cleanup: macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } static bool validate_add_rxsc(struct nlattr **attrs) { if (!attrs[MACSEC_RXSC_ATTR_SCI]) return false; if (attrs[MACSEC_RXSC_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_RXSC_ATTR_ACTIVE]) > 1) return false; } return true; } static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; sci_t sci = MACSEC_UNDEF_SCI; struct nlattr **attrs = info->attrs; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsc(tb_rxsc)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_add_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: del_rx_sc(secy, sci); free_rx_sc(rx_sc); rtnl_unlock(); return ret; } static bool validate_add_txsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_PN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) return false; return true; } static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_add_txsa(tb_sa)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_txsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } } tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]); if (tx_sa) { rtnl_unlock(); return -EBUSY; } tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL); if (!tx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(tx_sa); rtnl_unlock(); return err; } spin_lock_bh(&tx_sa->lock); tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa && tx_sa->active) secy->operational = true; if (secy->xpn) { tx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(tx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(tx_sc->sa[assoc_num], tx_sa); rtnl_unlock(); return 0; cleanup: secy->operational = was_operational; macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } static int macsec_del_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (rx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(rx_sc->sa[assoc_num], NULL); clear_rx_sa(rx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), info->attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = del_rx_sc(secy, sci); if (!rx_sc) { rtnl_unlock(); return -ENODEV; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsc, &ctx); if (ret) goto cleanup; } free_rx_sc(rx_sc); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_txsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(tx_sc->sa[assoc_num], NULL); clear_tx_sa(tx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static bool validate_upd_sa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || attrs[MACSEC_SA_ATTR_KEY] || attrs[MACSEC_SA_ATTR_KEYID] || attrs[MACSEC_SA_ATTR_SSCI] || attrs[MACSEC_SA_ATTR_SALT]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } return true; } static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational, was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&tx_sa->lock); prev_pn = tx_sa->next_pn_halves; tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } was_active = tx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa) secy->operational = tx_sa->active; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_txsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&tx_sa->lock); tx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&tx_sa->lock); } tx_sa->active = was_active; secy->operational = was_operational; rtnl_unlock(); return ret; } static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&rx_sa->lock); prev_pn = rx_sa->next_pn_halves; rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } was_active = rx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&rx_sa->lock); } rx_sa->active = was_active; rtnl_unlock(); return ret; } static int macsec_upd_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; unsigned int prev_n_rx_sc; bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsc(tb_rxsc)) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } was_active = rx_sc->active; prev_n_rx_sc = secy->n_rx_sc; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) { bool new = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); if (rx_sc->active != new) secy->n_rx_sc += new ? 1 : -1; rx_sc->active = new; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: secy->n_rx_sc = prev_n_rx_sc; rx_sc->active = was_active; rtnl_unlock(); return ret; } static bool macsec_is_configured(struct macsec_dev *macsec) { struct macsec_secy *secy = &macsec->secy; struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) if (tx_sc->sa[i]) return true; return false; } static bool macsec_needs_tx_tag(struct macsec_dev *macsec, const struct macsec_ops *ops) { return macsec->offload == MACSEC_OFFLOAD_PHY && ops->mdo_insert_tx_tag; } static void macsec_set_head_tail_room(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int needed_headroom, needed_tailroom; const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops) { needed_headroom = ops->needed_headroom; needed_tailroom = ops->needed_tailroom; } else { needed_headroom = MACSEC_NEEDED_HEADROOM; needed_tailroom = MACSEC_NEEDED_TAILROOM; } dev->needed_headroom = real_dev->needed_headroom + needed_headroom; dev->needed_tailroom = real_dev->needed_tailroom + needed_tailroom; } static void macsec_inherit_tso_max(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); /* if macsec is offloaded, we need to follow the lower * device's capabilities. otherwise, we can ignore them. */ if (macsec_is_offloaded(macsec)) netif_inherit_tso_max(dev, macsec->real_dev); } static int macsec_update_offload(struct net_device *dev, enum macsec_offload offload) { enum macsec_offload prev_offload; const struct macsec_ops *ops; struct macsec_context ctx; struct macsec_dev *macsec; int ret = 0; macsec = macsec_priv(dev); /* Check if the offloading mode is supported by the underlying layers */ if (offload != MACSEC_OFFLOAD_OFF && !macsec_check_offload(offload, macsec)) return -EOPNOTSUPP; /* Check if the net device is busy. */ if (netif_running(dev)) return -EBUSY; /* Check if the device already has rules configured: we do not support * rules migration. */ if (macsec_is_configured(macsec)) return -EBUSY; prev_offload = macsec->offload; ops = __macsec_get_ops(offload == MACSEC_OFFLOAD_OFF ? prev_offload : offload, macsec, &ctx); if (!ops) return -EOPNOTSUPP; macsec->offload = offload; ctx.secy = &macsec->secy; ret = offload == MACSEC_OFFLOAD_OFF ? macsec_offload(ops->mdo_del_secy, &ctx) : macsec_offload(ops->mdo_add_secy, &ctx); if (ret) { macsec->offload = prev_offload; return ret; } macsec_set_head_tail_room(dev); macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); macsec_inherit_tso_max(dev); netdev_update_features(dev); return ret; } static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb_offload[MACSEC_OFFLOAD_ATTR_MAX + 1]; struct nlattr **attrs = info->attrs; enum macsec_offload offload; struct macsec_dev *macsec; struct net_device *dev; int ret = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (!attrs[MACSEC_ATTR_OFFLOAD]) return -EINVAL; if (nla_parse_nested_deprecated(tb_offload, MACSEC_OFFLOAD_ATTR_MAX, attrs[MACSEC_ATTR_OFFLOAD], macsec_genl_offload_policy, NULL)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto out; } macsec = macsec_priv(dev); if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) { ret = -EINVAL; goto out; } offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload != offload) ret = macsec_update_offload(dev, offload); out: rtnl_unlock(); return ret; } static void get_tx_sa_stats(struct net_device *dev, int an, struct macsec_tx_sa *tx_sa, struct macsec_tx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.tx_sa = tx_sa; ctx.stats.tx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_tx_sa_stats *stats = per_cpu_ptr(tx_sa->stats, cpu); sum->OutPktsProtected += stats->OutPktsProtected; sum->OutPktsEncrypted += stats->OutPktsEncrypted; } } static int copy_tx_sa_stats(struct sk_buff *skb, struct macsec_tx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted)) return -EMSGSIZE; return 0; } static void get_rx_sa_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, int an, struct macsec_rx_sa *rx_sa, struct macsec_rx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.rx_sa = rx_sa; ctx.stats.rx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_rx_sa_stats *stats = per_cpu_ptr(rx_sa->stats, cpu); sum->InPktsOK += stats->InPktsOK; sum->InPktsInvalid += stats->InPktsInvalid; sum->InPktsNotValid += stats->InPktsNotValid; sum->InPktsNotUsingSA += stats->InPktsNotUsingSA; sum->InPktsUnusedSA += stats->InPktsUnusedSA; } } static int copy_rx_sa_stats(struct sk_buff *skb, struct macsec_rx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA)) return -EMSGSIZE; return 0; } static void get_rx_sc_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, struct macsec_rx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.rx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_rx_sc_stats *stats; struct macsec_rx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(rx_sc->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->InOctetsValidated += tmp.InOctetsValidated; sum->InOctetsDecrypted += tmp.InOctetsDecrypted; sum->InPktsUnchecked += tmp.InPktsUnchecked; sum->InPktsDelayed += tmp.InPktsDelayed; sum->InPktsOK += tmp.InPktsOK; sum->InPktsInvalid += tmp.InPktsInvalid; sum->InPktsLate += tmp.InPktsLate; sum->InPktsNotValid += tmp.InPktsNotValid; sum->InPktsNotUsingSA += tmp.InPktsNotUsingSA; sum->InPktsUnusedSA += tmp.InPktsUnusedSA; } } static int copy_rx_sc_stats(struct sk_buff *skb, struct macsec_rx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, sum->InOctetsValidated, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, sum->InOctetsDecrypted, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, sum->InPktsUnchecked, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, sum->InPktsDelayed, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, sum->InPktsLate, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA, MACSEC_RXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static void get_tx_sc_stats(struct net_device *dev, struct macsec_tx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.tx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_tx_sc_stats *stats; struct macsec_tx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(macsec_priv(dev)->secy.tx_sc.stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsProtected += tmp.OutPktsProtected; sum->OutPktsEncrypted += tmp.OutPktsEncrypted; sum->OutOctetsProtected += tmp.OutOctetsProtected; sum->OutOctetsEncrypted += tmp.OutOctetsEncrypted; } } static int copy_tx_sc_stats(struct sk_buff *skb, struct macsec_tx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, sum->OutOctetsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, sum->OutOctetsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD))